예제 #1
0
파일: main.py 프로젝트: educa-labs/psupro
class Newton:
    '''
    area_ids - np.array con ides de las areas.
    serialized_forests - str path a carpeta con forests serializados.
    serialized_tree - str path a carpeta con tree serializado
    data_dir - str path a carpeta con datos.
    n_forest_results - int cantidad de resultados obtenidos por el RF
    k - int cantidad de vecinos cercanos calculados por el BallTree
    '''

    def __init__(self, area_ids, serialized_forests, serialized_tree, data_dir, cache=4, n_forest_results=3, k=5):
        self.area_ids = area_ids
        self.balltree = Tree(serialized_tree, data_dir)
        self.n_forest_results = n_forest_results
        self.k = k
        self.serialized_forests = serialized_forests
        self.cache = cache
        self.locks = {i: Lock() for i in area_ids}
        self.counters = {i: 0 for i in area_ids}
        self.active_forests = LRU(cache, callback=lambda key, value: clear(key, value, self.locks,self.counters))

    '''
    area_id - int  id de area para recomendar
    scores - np.array (n,5) arreglo de puntajes para recomendar
    retorna np.array (n,n_forest_results,k) carreras recomendadas
    '''

    def get_recs(self, area_id, scores):
        prediction = self.predict(area_id, scores, self.n_forest_results)
        recommendations = []
        for carreer_set in prediction:
            recommendations.append(self.balltree.query(carreer_set, self.k))
        return np.array(recommendations)

    def predict(self, area_id, scores, n_results):
        with self.locks[area_id]:
            self.counters[area_id] += 1
        if not self.active_forests.has_key(area_id):
            if get_mem_percentage() < 0.3:
                clear(self.active_forests.peek_last_item()[0], self.active_forests[self.active_forests.peek_last_item()[0]], self.locks,self.counters)
            self.active_forests[area_id] = Forest(area_id, self.serialized_forests)
            # print(get_mem_percentage())
        forest = self.active_forests[area_id]
        prediction = forest.get_class(forest.query(scores, n_results))
        with self.locks[area_id]:
            self.counters[area_id] -= 1
        # print(self.active_forests.items())
        return prediction

    def filter_recs(self, user, carreers):
        pass
예제 #2
0
def lru(size):
	hitCounter=missCounter=0
	#Size in GB
 	size = spaceLeft= int(size)*1024*1024*1024
        hashmap={}
	cache = LRU(size)
	for key in trace:
		if key in hashmap:
			hitCounter+=int(dict[key])
			cache[key]=dict[key]
		else:
			missCounter +=int(dict[key])
			# Miss no Eviction
			if (int(dict[key]) <= spaceLeft):
				cache[key]=dict[key]
				hashmap[key]=dict[key]
				spaceLeft -= int(dict[key])
			else:
			# Miss - Cache Eviction	
				while(dict[key] > spaceLeft):
					id = cache.peek_last_item()[0]	
				 	spaceLeft+=int(hashmap[id])
                                        del cache[id]
					del hashmap[id]
				hashmap[key]=dict[key]
                                cache[key]=dict[key]
                                spaceLeft -= int(dict[key])
        now = datetime.datetime.now()
	print "Hit_Counter:"+str(hitCounter)+",Miss_Counter:"+str(missCounter)
	print "Miss_Ratio:"+ str(float(missCounter)/ float(hitCounter+missCounter))
	logging.info( str(now)[:19]+"	Hit_Counter:"+str(hitCounter)+",Miss_Counter:"+str(missCounter) )	
        logging.info( str(now)[:19]+"	Miss_Ratio:"+ str(float(missCounter)/float(hitCounter+missCounter)) )	
예제 #3
0
def test_dict_behavior_matches_LRU_implementation():
    lru = LRU(100)
    lru_sql_dict = LRUSQLDict(
        sqlite3.connect(":memory:"),
        key_encoder,
        key_decoder,
        value_encoder,
        value_decoder,
        100,
    )
    kv_pairs = ((to_bytes(number), number) for number in range(20))
    for pair in kv_pairs:
        lru[pair[0]] = pair[1]
        lru_sql_dict[pair[0]] = pair[1]

        assert lru.peek_first_item() == (lru_sql_dict.head.key,
                                         lru_sql_dict.head.value)
        assert lru.peek_last_item() == (lru_sql_dict.tail.key,
                                        lru_sql_dict.tail.value)

    lru[to_bytes(10)]
    lru_sql_dict[to_bytes(10)]

    assert lru.peek_first_item() == (lru_sql_dict.head.key,
                                     lru_sql_dict.head.value)
    assert lru.peek_last_item() == (lru_sql_dict.tail.key,
                                    lru_sql_dict.tail.value)

    lru[to_bytes(15)] = 100
    lru_sql_dict[to_bytes(15)] = 100

    assert lru.peek_first_item() == (lru_sql_dict.head.key,
                                     lru_sql_dict.head.value)
    assert lru.peek_last_item() == (lru_sql_dict.tail.key,
                                    lru_sql_dict.tail.value)

    del lru[to_bytes(0)]
    del lru_sql_dict[to_bytes(0)]

    assert lru.peek_first_item() == (lru_sql_dict.head.key,
                                     lru_sql_dict.head.value)
    assert lru.peek_last_item() == (lru_sql_dict.tail.key,
                                    lru_sql_dict.tail.value)

    lru[to_bytes(100)] = 100
    lru_sql_dict[to_bytes(100)] = 100

    assert lru.peek_first_item() == (lru_sql_dict.head.key,
                                     lru_sql_dict.head.value)
    assert lru.peek_last_item() == (lru_sql_dict.tail.key,
                                    lru_sql_dict.tail.value)

    lru[to_bytes(5)]
    lru_sql_dict[to_bytes(5)]

    assert lru.peek_first_item() == (lru_sql_dict.head.key,
                                     lru_sql_dict.head.value)
    assert lru.peek_last_item() == (lru_sql_dict.tail.key,
                                    lru_sql_dict.tail.value)
예제 #4
0
def lru(ratio,output_file,data):
	hit=miss=0
        #size = avail = float(data * ratio)/100
   	#divi=math.pow(2,ratio)
 	size = avail = ratio*1024*1024*1024*1024
	#size = float(data)/divi
        hashmap={}
        avail=int(avail)
	cache = LRU(82170872)
	for i in range(len(key)):
		if key[i] in hashmap:
	#		if(i>14915099):
	#			hit+=int(osize[i])
			hit+=int(osize[i])
			cache[key[i]]=osize[i]
		else:
		#	if(i>14915099):
		#		miss +=int(osize[i])
			miss +=int(osize[i])
			if (int(osize[i]) <= avail):
				cache[key[i]]="1"
				hashmap[key[i]]=int(osize[i])
				avail -= int(osize[i])
			else:
				while(int(osize[i]) > avail):
					id = cache.peek_last_item()[0]	
				 	avail+=int(hashmap[id])
                                        del cache[id]
					del hashmap[id]
				hashmap[key[i]]=osize[i]
                                cache[key[i]]="1"
                                avail -= int(osize[i])
        fd = open("lru.res","a")
        fd.write(str(hit)+","+str(miss)+"\n")
        fd.close()
        logging.info("Hit Ratio:"+str(hit))	
        logging.info("Miss Ratio:"+str(miss))	
#	print hit, "," , miss
	print float(miss)/float(hit+miss)
class complex_cache:
    def __init__(self, size, type): # the number of items
        self.size = size # actual size of the cache
        self.lru = LRU(size)

        self.hits = 0.0
        self.reqs = 0.0
        self.cache_stack_size = 0 # how much of the cache is occupied


    def place(self, request):
        # request is a tuple (timestamp, username)
        self.reqs += 1 
        if self.lru.has_key(request[-1]): 
            self.lru[request[-1]] = self.lru[request[-1]] + 1
            
            self.hits += 1            
        else:
            if self.cache_stack_size + 1 > self.size: 
                print "evict an item: "+str(self.lru.peek_last_item())
                self.cache_stack_size -= 1
                
            self.lru[request[-1]] = 1
            self.cache_stack_size += 1
예제 #6
0
 def test_peek_last_item(self):
     l = LRU(2)
     self.assertEqual(None, l.peek_last_item())
     l[1] = '1'
     l[2] = '2'
     self.assertEqual((1, '1'), l.peek_last_item())
예제 #7
0
파일: DND.py 프로젝트: tabzraz/RL
class DND:
    def __init__(self, kernel, num_neighbors, max_memory, embedding_size):
        # self.dictionary = LRUCache(max_memory)
        # self.kd_tree = kdtree.create(dimensions=embedding_size)
        # rnd_projection = RandomBinaryProjections("RBP", 8)
        # distance = EuclideanDistance()
        # nearest = NearestFilter(num_neighbors)
        # self.nearpy = Engine(dim=embedding_size, lshashes=[rnd_projection], distance=distance, vector_filters=[nearest], fetch_vector_filters=[])

        self.kd_tree = None
        # self.data = []

        # self.lshash = LSHash(hash_size=embedding_size, input_dim=embedding_size, num_hashtables=10)
        self.lru = LRU(size=max_memory)

        self.num_neighbors = num_neighbors
        self.kernel = kernel
        self.max_memory = max_memory
        self.embedding_size = embedding_size
        # self.keys_added = []

    def is_present(self, key):
        return tuple(key) in self.lru  # self.lru.has_key(tuple(key))
        # return self.dictionary.get(tuple(key)) is not None
        # return self.dictionary.get(tuple(key.data.cpu().numpy()[0])) is not None

    def get_value(self, key):
        return self.lru[tuple(key)]
        # return self.dictionary.get(tuple(key))
        # return self.dictionary.get(tuple(key.data.cpu().numpy()[0]))

    def lookup(self, lookup_key):
        # TODO: Speed up search knn
        # keys = [key[0].data for key in self.kd_tree.search_knn(lookup_key, self.num_neighbors)]
        lookup_key_numpy = lookup_key.data[0].numpy()
        # lookup_key_tuple = tuple(lookup_key_numpy)
        # print(lookup_key)

        # keys = [key[0] for key in self.lshash.query_no_data(lookup_key_numpy, num_results=self.num_neighbors)]
        # keys = [key[1] for key in self.nearpy.neighbours(lookup_key_numpy)]
        if self.kd_tree is not None:
            # print(len(self.lru.keys()), lookup_key_numpy)
            # things_distances, things_index = self.kd_tree.query(lookup_key_numpy, k=self.num_neighbors, eps=1.0)
            things_index = self.kd_tree.query([lookup_key_numpy],
                                              k=min(self.num_neighbors,
                                                    len(self.kd_tree.data)),
                                              return_distance=False,
                                              sort_results=False)
            # print(things_index)
            keys = [self.lru.keys()[ii[0]] for ii in things_index]
            # print(keys)
        else:
            keys = []

        # print(keys)
        # print(keys)
        # output, kernel_sum = Variable(FloatTensor([0])), Variable(FloatTensor([0]))
        output, kernel_sum = 0, 0
        # if len(keys) != 0:
        # print(keys)
        # TODO: Speed this up since the kernel takes a significant amount of time
        for key in keys:
            # print("Key:",key, lookup_key)
            # if not np.allclose(key, lookup_key_numpy): #(key == lookup_key).data.all():
            if not np.all(key == lookup_key_numpy):
                # print("Here")
                # gg = Variable(FloatTensor(np.array(key)))
                # print(key)
                # gg = Variable(FloatTensor(key))
                gg = Variable(torch.from_numpy(np.array(key)))
                # print(tuple(key))
                # hh = lookup_key[0] - gg
                # print("Key:", gg, "Lookup key", lookup_key[0])
                # print(lookup_key[0] + gg)
                kernel_val = self.kernel(gg, lookup_key[0])
                # print("key:", self.lru.get(tuple(key)))
                # if not self.lru.has_key(tuple(key)):
                # print(keys)
                # print(tuple(key))
                # print(key in self.keys_added)
                # print(len(self.lru))
                # if tuple(key) not in self.lru:
                # print("NOT IN:", tuple(key))
                # print(len(keys))
                output += kernel_val * self.lru.get(tuple(key))
                # output += kernel_val * self.dictionary.get(tuple(key))
                # print("Key", key.requires_grad, key.volatile)
                # print("Kernel key", self.kernel(key, lookup_key).requires_grad)
                # print("Output in loop", output.requires_grad)
                kernel_sum += kernel_val  #self.kernel(key, lookup_key)
                # print(kernel_sum)
        # if len(keys) == 0:
        #     return (lookup_key * 0)[0][0]
        if isinstance(kernel_sum, int):
            return (lookup_key * 0)[0][0]
        # if kernel_sum == 0:
        # print("0 Kernel", kernel_sum)
        # if len(keys) == 0:
        # print("0 keys", len(keys))
        if kernel_sum.data[0] == 0 or len(keys) == 0:
            # print(lookup_key)
            # zeroed = (lookup_key * 0)[0][0]
            # print("Zero Lookup.", output.data, kernel_sum.data, len(keys))
            return (lookup_key * 0)[0][0]
        # print("lookup_key", lookup_key.requires_grad, lookup_key.volatile)
        # print("kernled", self.kernel(keys[0], lookup_key).requires_grad)
        # print("output", output.requires_grad, output.volatile)
        # print("ks", kernel_sum.requires_grad, kernel_sum.volatile)
        # print("Non-Zero Lookup for {}".format(lookup_key))
        output = output / kernel_sum
        # print(output)
        return output

    def upsert(self, key, value):
        # key = key.data[0].numpy()
        # print(key)
        # self.keys_added.append(key)
        # if not self.lru.has_key(tuple(key)):# self.is_present(key):
        # self.kd_tree.add(key)
        # print("Key going in", key)
        # self.lshash.index(input_point=key)
        # self.nearpy.store_vector(key, data=key)

        # print("Adding", tuple(key), key)
        # neighbours = self.nearpy.neighbours(key)
        # print(neighbours)

        self.lru[tuple(key)] = value
        # self.kd_tree = KDTree(data=self.lru.keys(), compact_nodes=False, copy_data=False, balanced_tree=False)
        self.kd_tree = KDTree(self.lru.keys())

        return
        if len(self.lru) == self.max_memory:
            # Expel least recently used key from self.dictionary and self.kd_tree if memory used is at capacity
            # deleted_key = self.dictionary.delete_least_recently_used()[0]
            # deleted_key = self.lru.peek_last_item()[0]
            # print("Deleted key:",deleted_key)
            # deleted_key = np.array(deleted_key)
            # thing = Variable(torch.from_numpy(deleted_key).float()).unsqueeze(0)
            # thing = Variable(FloatTensor(deleted_key)).unsqueeze(0)
            # print("Thing:",thing)
            # print(self.dictionary.cache.keys())
            key_to_delete = self.lru.peek_last_item()
            self.lru[tuple(key)] = value
            # self.kd_tree.remove(Variable(FloatTensor(deleted_key)).unsqueeze(0))
            # self.kd_tree.remove(deleted_key)

            # Remake the LSHASH with the deleted key
            # print("remaking")

            # self.lshash = LSHash(hash_size=self.embedding_size, input_dim=self.embedding_size)
            # for k in self.lru.keys():
            #     self.lshash.index(np.array(k))

            # print("Deleting", np.array(key_to_delete[0]))
            # self.nearpy.delete_vector(key_to_delete[0])
            # self.nearpy.clean_all_buckets()
            # for k in self.lru.keys():
            # self.nearpy.store_vector(np.array(k))

            # Checking that the lru keys are the same as the keys in the lshash
            # for key in self.lru.keys():
            #     keys_close = [key[0] for key in self.lshash.query(key, num_results=5)]
            #     # print(keys_close)
            #     for kk in keys_close:
            #         if kk not in self.lru:
            #             print("\n\nProblems! Key in LSHASH not in LRU\n\n")

            # Check length of all lru keys
            # all_lru_keys = self.lshash.query(key)
            # print("\n", len(all_lru_keys), "\n")
        else:
            self.lru[tuple(key)] = value

        self.kdtree = KDTree(self.data)
예제 #8
0
class FileServer(fileService_pb2_grpc.FileserviceServicer):
    def __init__(self, hostname, server_port, activeNodesChecker,
                 shardingHandler, superNodeAddress):
        self.serverPort = server_port
        self.serverAddress = hostname + ":" + server_port
        self.activeNodesChecker = activeNodesChecker
        self.shardingHandler = shardingHandler
        self.hostname = hostname
        self.lru = LRU(5)
        self.superNodeAddress = superNodeAddress

    #
    #   This service gets invoked when user uploads a new file.
    #
    def UploadFile(self, request_iterator, context):
        print("Inside Server method ---------- UploadFile")
        data = bytes("", 'utf-8')
        username, filename = "", ""
        totalDataSize = 0
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()

        # list to store the info related to file location.
        metaData = []

        # If the node is the leader of the cluster.
        if (int(db.get("primaryStatus")) == 1):
            print("Inside primary upload")
            currDataSize = 0
            currDataBytes = bytes("", 'utf-8')
            seqNo = 1

            # Step 1:
            # Get 2 least loaded nodes based on the CPU stats.
            # 'Node' is where the actual data goes and 'node_replica' is where replica will go.
            node, node_replica = self.getLeastLoadedNode()

            if (node == -1):
                return fileService_pb2.ack(
                    success=False,
                    message="Error Saving File. No active nodes.")

            # Step 2:
            # Check whether file already exists, if yes then return with message 'File already exists'.
            for request in request_iterator:
                username, filename = request.username, request.filename
                print("Key is-----------------", username + "_" + filename)
                if (self.fileExists(username, filename) == 1):
                    print("sending neg ack")
                    return fileService_pb2.ack(
                        success=False,
                        message=
                        "File already exists for this user. Please rename or delete file first."
                    )
                break

            # Step 3:
            # Make chunks of size 'UPLOAD_SHARD_SIZE' and start sending the data to the least utilized node trough gRPC streaming.
            currDataSize += sys.getsizeof(request.data)
            currDataBytes += request.data

            for request in request_iterator:

                if ((currDataSize + sys.getsizeof(request.data)) >
                        UPLOAD_SHARD_SIZE):
                    response = self.sendDataToDestination(
                        currDataBytes, node, node_replica, username, filename,
                        seqNo, active_ip_channel_dict[node])
                    metaData.append([node, seqNo, node_replica])
                    currDataBytes = request.data
                    currDataSize = sys.getsizeof(request.data)
                    seqNo += 1
                    node, node_replica = self.getLeastLoadedNode()
                else:
                    currDataSize += sys.getsizeof(request.data)
                    currDataBytes += request.data

            if (currDataSize > 0):
                response = self.sendDataToDestination(
                    currDataBytes, node, node_replica, username, filename,
                    seqNo, active_ip_channel_dict[node])
                metaData.append([node, seqNo, node_replica])

            # Step 4:
            # Save the metadata on the primary node after the completion of sharding.
            if (response.success):
                db.saveMetaData(username, filename, metaData)
                db.saveUserFile(username, filename)

            # Step 5:
            # Make a gRPC call to replicate the matadata on all the other nodes.
            self.saveMetadataOnAllNodes(username, filename, metaData)

            return fileService_pb2.ack(success=True, message="Saved")

        # If the node is not the leader.
        else:
            print("Saving the data on my local db")
            sequenceNumberOfChunk = 0
            dataToBeSaved = bytes("", 'utf-8')

            # Gather all the data from gRPC stream
            for request in request_iterator:
                username, filename, sequenceNumberOfChunk = request.username, request.filename, request.seqNo
                dataToBeSaved += request.data
            key = username + "_" + filename + "_" + str(sequenceNumberOfChunk)

            # Save the data in local DB.
            db.setData(key, dataToBeSaved)

            # After saving the chunk in the local DB, make a gRPC call to save the replica of the chunk on different
            # node only if the replicaNode is present.
            if (request.replicaNode != ""):
                print("Sending replication to ", request.replicaNode)
                replica_channel = active_ip_channel_dict[request.replicaNode]
                t1 = Thread(target=self.replicateChunkData,
                            args=(
                                replica_channel,
                                dataToBeSaved,
                                username,
                                filename,
                                sequenceNumberOfChunk,
                            ))
                t1.start()
                # stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
                # response = stub.UploadFile(self.sendDataInStream(dataToBeSaved, username, filename, sequenceNumberOfChunk, ""))

            return fileService_pb2.ack(success=True, message="Saved")

    def replicateChunkData(self, replica_channel, dataToBeSaved, username,
                           filename, sequenceNumberOfChunk):
        stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
        response = stub.UploadFile(
            self.sendDataInStream(dataToBeSaved, username, filename,
                                  sequenceNumberOfChunk, ""))

    # This helper method is responsible for sending the data to destination node through gRPC stream.
    def sendDataToDestination(self, currDataBytes, node, nodeReplica, username,
                              filename, seqNo, channel):
        if (node == self.serverAddress):
            key = username + "_" + filename + "_" + str(seqNo)
            db.setData(key, currDataBytes)
            if (nodeReplica != ""):
                print("Sending replication to ", nodeReplica)
                active_ip_channel_dict = self.activeNodesChecker.getActiveChannels(
                )
                replica_channel = active_ip_channel_dict[nodeReplica]
                stub = fileService_pb2_grpc.FileserviceStub(replica_channel)
                response = stub.UploadFile(
                    self.sendDataInStream(currDataBytes, username, filename,
                                          seqNo, ""))
                return response
        else:
            print("Sending the UPLOAD_SHARD_SIZE to node :", node)
            stub = fileService_pb2_grpc.FileserviceStub(channel)
            response = stub.UploadFile(
                self.sendDataInStream(currDataBytes, username, filename, seqNo,
                                      nodeReplica))
            print("Response from uploadFile: ", response.message)
            return response

    # This helper method actually makes chunks of less than 4MB and streams them through gRPC.
    # 4 MB is the max data packet size in gRPC while sending. That's why it is necessary.
    def sendDataInStream(self, dataBytes, username, filename, seqNo,
                         replicaNode):
        chunk_size = 4000000
        start, end = 0, chunk_size
        while (True):
            chunk = dataBytes[start:end]
            if (len(chunk) == 0): break
            start = end
            end += chunk_size
            yield fileService_pb2.FileData(username=username,
                                           filename=filename,
                                           data=chunk,
                                           seqNo=seqNo,
                                           replicaNode=replicaNode)

    #
    #   This service gets invoked when user requests an uploaded file.
    #
    def DownloadFile(self, request, context):

        print("Inside Download")

        # If the node is the leader of the cluster.
        if (int(db.get("primaryStatus")) == 1):

            print("Inside primary download")

            # Check if file exists
            if (self.fileExists(request.username, request.filename) == 0):
                print("File does not exist")
                yield fileService_pb2.FileData(username=request.username,
                                               filename=request.filename,
                                               data=bytes("", 'utf-8'),
                                               seqNo=0)
                return

            # If the file is present in cache then just fetch it and return. No need to go to individual node.
            if (self.lru.has_key(request.username + "_" + request.filename)):
                print("Fetching data from Cache")
                CHUNK_SIZE = 4000000
                fileName = request.username + "_" + request.filename
                filePath = self.lru[fileName]
                outfile = os.path.join(filePath, fileName)

                with open(outfile, 'rb') as infile:
                    while True:
                        chunk = infile.read(CHUNK_SIZE)
                        if not chunk: break
                        yield fileService_pb2.FileData(
                            username=request.username,
                            filename=request.filename,
                            data=chunk,
                            seqNo=1)

            # If the file is not present in the cache, then fetch it from the individual node.
            else:
                print("Fetching the metadata")

                # Step 1: get metadata i.e. the location of chunks.
                metaData = db.parseMetaData(request.username, request.filename)

                print(metaData)

                #Step 2: make gRPC calls and get the fileData from all the nodes.
                downloadHelper = DownloadHelper(self.hostname, self.serverPort,
                                                self.activeNodesChecker)
                data = downloadHelper.getDataFromNodes(request.username,
                                                       request.filename,
                                                       metaData)
                print("Sending the data to client")

                #Step 3: send the file to supernode using gRPC streaming.
                chunk_size = 4000000
                start, end = 0, chunk_size
                while (True):
                    chunk = data[start:end]
                    if (len(chunk) == 0): break
                    start = end
                    end += chunk_size
                    yield fileService_pb2.FileData(username=request.username,
                                                   filename=request.filename,
                                                   data=chunk,
                                                   seqNo=request.seqNo)

                # Step 4: update the cache based on LRU(least recently used) algorithm.
                self.saveInCache(request.username, request.filename, data)

        # If the node is not the leader, then just fetch the fileChunk from the local db and stream it back to leader.
        else:
            key = request.username + "_" + request.filename + "_" + str(
                request.seqNo)
            print(key)
            data = db.getFileData(key)
            chunk_size = 4000000
            start, end = 0, chunk_size
            while (True):
                chunk = data[start:end]
                if (len(chunk) == 0): break
                start = end
                end += chunk_size
                yield fileService_pb2.FileData(username=request.username,
                                               filename=request.filename,
                                               data=chunk,
                                               seqNo=request.seqNo)

    # This service is responsible fetching all the files.
    def FileList(self, request, context):
        print("File List Called")
        userFiles = db.getUserFiles(request.username)
        return fileService_pb2.FileListResponse(Filenames=str(userFiles))

    # This helper method checks whether the file is present in db or not.
    def fileExists(self, username, filename):
        print("isFile Present", db.keyExists(username + "_" + filename))
        return db.keyExists(username + "_" + filename)

    # This helper method returns 2 least loaded nodes from the cluster.
    def getLeastLoadedNode(self):
        print("Ready to enter sharding handler")
        node, node_replica = self.shardingHandler.leastUtilizedNode()
        print("Least loaded node is :", node)
        print("Replica node - ", node_replica)
        return node, node_replica

    # This helper method replicates the metadata on all nodes.
    def saveMetadataOnAllNodes(self, username, filename, metadata):
        print("saveMetadataOnAllNodes")
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()
        uniqueFileName = username + "_" + filename
        for ip, channel in active_ip_channel_dict.items():
            if (self.isChannelAlive(channel)):
                stub = fileService_pb2_grpc.FileserviceStub(channel)
                response = stub.MetaDataInfo(
                    fileService_pb2.MetaData(
                        filename=uniqueFileName,
                        seqValues=str(metadata).encode('utf-8')))
                print(response.message)

    # This service is responsible for saving the metadata on local db.
    def MetaDataInfo(self, request, context):
        print("Inside Metadatainfo")
        fileName = request.filename
        seqValues = request.seqValues
        db.saveMetaDataOnOtherNodes(fileName, seqValues)
        ack_message = "Successfully saved the metadata on " + self.serverAddress
        return fileService_pb2.ack(success=True, message=ack_message)

    # This helper method checks whethere created channel is alive or not
    def isChannelAlive(self, channel):
        try:
            grpc.channel_ready_future(channel).result(timeout=1)
        except grpc.FutureTimeoutError:
            #print("Connection timeout. Unable to connect to port ")
            return False
        return True

    # This helper method is responsible for updating the cache for faster lookup.
    def saveInCache(self, username, filename, data):
        if (len(self.lru.items()) >= self.lru.get_size()):
            fileToDel, path = self.lru.peek_last_item()
            os.remove(path + "/" + fileToDel)

        self.lru[username + "_" + filename] = "cache"
        filePath = os.path.join('cache', username + "_" + filename)
        saveFile = open(filePath, 'wb')
        saveFile.write(data)
        saveFile.close()

    # This service is responsible for sending the whole cluster stats to superNode
    def getClusterStats(self, request, context):
        print("Inside getClusterStats")
        active_ip_channel_dict = self.activeNodesChecker.getActiveChannels()
        total_cpu_usage, total_disk_space, total_used_mem = 0.0, 0.0, 0.0
        total_nodes = 0
        for ip, channel in active_ip_channel_dict.items():
            if (self.isChannelAlive(channel)):
                stub = heartbeat_pb2_grpc.HearBeatStub(channel)
                stats = stub.isAlive(heartbeat_pb2.NodeInfo(ip="", port=""))
                total_cpu_usage = float(stats.cpu_usage)
                total_disk_space = float(stats.disk_space)
                total_used_mem = float(stats.used_mem)
                total_nodes += 1

        if (total_nodes == 0):
            return fileService_pb2.ClusterStats(cpu_usage=str(100.00),
                                                disk_space=str(100.00),
                                                used_mem=str(100.00))

        return fileService_pb2.ClusterStats(
            cpu_usage=str(total_cpu_usage / total_nodes),
            disk_space=str(total_disk_space / total_nodes),
            used_mem=str(total_used_mem / total_nodes))

    # This service is responsible for sending the leader info to superNode as soon as leader changes.
    def getLeaderInfo(self, request, context):
        channel = grpc.insecure_channel('{}'.format(self.superNodeAddress))
        stub = fileService_pb2_grpc.FileserviceStub(channel)
        response = stub.getLeaderInfo(
            fileService_pb2.ClusterInfo(ip=self.hostname,
                                        port=self.serverPort,
                                        clusterName="team1"))
        print(response.message)

    #
    #   This service gets invoked when user deletes a file.
    #
    def FileDelete(self, request, data):
        username = request.username
        filename = request.filename

        if (int(db.get("primaryStatus")) == 1):

            if (self.fileExists(username, filename) == 0):
                print("File does not exist")
                return fileService_pb2.ack(success=False,
                                           message="File does not exist")

            print("Fetching metadata from leader")
            metadata = db.parseMetaData(request.username, request.filename)
            print("Successfully retrieved metadata from leader")

            deleteHelper = DeleteHelper(self.hostname, self.serverPort,
                                        self.activeNodesChecker)
            deleteHelper.deleteFileChunksAndMetaFromNodes(
                username, filename, metadata)

            return fileService_pb2.ack(
                success=True,
                message="Successfully deleted file from the cluster")

        else:
            seqNo = -1

            try:
                seqNo = request.seqNo
            except:
                return fileService_pb2.ack(success=False,
                                           message="Internal Error")

            metaDataKey = username + "_" + filename
            dataChunkKey = username + "_" + filename + "_" + str(seqNo)

            if (db.keyExists(metaDataKey) == 1):
                print("FileDelete: Deleting the metadataEntry from local db :")
                db.deleteEntry(metaDataKey)
            if (db.keyExists(dataChunkKey)):
                print("FileDelete: Deleting the data chunk from local db: ")
                db.deleteEntry(dataChunkKey)

            return fileService_pb2.ack(
                success=True,
                message="Successfully deleted file from the cluster")

    #
    #   This service gets invoked when user wants to check if the file is present.
    #
    def FileSearch(self, request, data):
        username, filename = request.username, request.filename

        if (self.fileExists(username, filename) == 1):
            return fileService_pb2.ack(success=True,
                                       message="File exists in the cluster.")
        else:
            return fileService_pb2.ack(
                success=False, message="File does not exist in the cluster.")

    #
    #   This service gets invoked when user wants to update a file.
    #
    def UpdateFile(self, request_iterator, context):

        username, filename = "", ""
        fileData = bytes("", 'utf-8')

        for request in request_iterator:
            fileData += request.data
            username, filename = request.username, request.filename

        def getFileChunks(fileData):
            # Maximum chunk size that can be sent
            CHUNK_SIZE = 4000000

            outfile = os.path.join('files', fileName)

            sTime = time.time()

            while True:
                chunk = fileData.read(CHUNK_SIZE)
                if not chunk: break

                yield fileService_pb2.FileData(username=username,
                                               filename=fileName,
                                               data=chunk,
                                               seqNo=1)
            print("Time for upload= ", time.time() - sTime)

        if (int(db.get("primaryStatus")) == 1):
            channel = grpc.insecure_channel('{}'.format(self.serverAddress))
            stub = fileService_pb2_grpc.FileserviceStub(channel)

            response1 = stub.FileDelete(
                fileService_pb2.FileInfo(username=userName, filename=fileName))

            if (response1.success):
                response2 = stub.UploadFile(getFileChunks(fileData))
                if (response2.success):
                    return fileService_pb2.ack(
                        success=True, message="File suceessfully updated.")
                else:
                    return fileService_pb2.ack(success=False,
                                               message="Internal error.")
            else:
                return fileService_pb2.ack(success=False,
                                           message="Internal error.")
예제 #9
0
class Cache:

    # Replacement policies
    LRU = "LRU"
    FIFO = 'FIFO'

    def __init__(self, name, size, policy):
        self.name = name
        self.size = size
        self.free_space = size
        self.policy = policy  # Eviction policy
        self.hashmap = {}  # Mapping <objname,objsize>

        if (self.policy == Cache.LRU):
            self.cache = LRU(self.size)
        elif (self.policy == Cache.FIFO):
            self.cache = queue.Queue(maxsize=self.size)

        # Statistics
        self.hit_count = 0
        self.miss_count = 0

    def has_key(self, key):
        if key in self.hashmap.keys():
            return True
        else:
            return False

    def update(self, key, size):
        self.hashmap[key] = size
        self.hit_count += 1
        if (self.policy == Cache.LRU):
            self.cache.update(key=size)
        elif (self.policy == Cache.FIFO):
            self.cache.put(key)

    def insert(self, key, size, directory):
        if (self.policy == Cache.LRU):
            self.insertLRU(key, size, directory)
        elif (self.policy == Cache.FIFO):
            self.insertFIFO(key, size, directory)

    def evictLRU(self, directory):
        oid = self.cache.peek_last_item()[0]
        directory.removeBlock(oid, self.name)
        del [oid]
        del self.hashmap[oid]
        self.free_space += int(self.hashmap[oid])

    def evictFIFO(self, directory):
        oid = self.cache.get()
        directory.removeBlock(oid, self.name)
        self.free_space += int(self.hashmap[oid])
        del self.hashmap[oid]

    def insertLRU(self, key, size, directory):
        while (int(size) >= self.free_space):
            self.evictLRU(directory)
        self.cache[key] = size
        self.hashmap[key] = size
        self.free_space += size
        self.miss_count += 1

    def insertFIFO(self, key, size, directory):
        while (int(size) >= self.free_space):
            self.evictFIFO(directory)
        self.cache.put(key)
        self.hashmap[key] = size
        self.free_space += size
        self.miss_count += 1

    def put(self, key, size, directory):
        if self.has_key(key):
            self.update(key, size)
        else:
            self.insert(key, size, directory)

    def print(self):
        if (self.policy == Cache.LRU):
            print(self.name, "LRU", self.hashmap, self.cache.items())
        elif (self.policy == Cache.FIFO):
            print(self.name, "LRU", self.hashmap, list(self.cache.queue))

    def remove(self, key):
        del self.hashmap[key]
        if (self.policy == Cache.LRU):
            del self.cache[key]
        elif (self.policy == Cache.FIFO):
            a = 5
예제 #10
0
from lru import LRU
l = LRU(5)  # Create an LRU container that can hold 5 items

print(l.peek_first_item(), l.peek_last_item())  #return the MRU key and LRU key
# Would print None None

for i in range(5):
    l[i] = str(i)
print(l.items())  # Prints items in MRU order
# Would print [(4, '4'), (3, '3'), (2, '2'), (1, '1'), (0, '0')]

print(l.peek_first_item(), l.peek_last_item())  #return the MRU key and LRU key
# Would print (4, '4') (0, '0')

l[5] = '5'  # Inserting one more item should evict the old item
print(l.items())
# Would print [(5, '5'), (4, '4'), (3, '3'), (2, '2'), (1, '1')]

l[3]  # Accessing an item would make it MRU
print(l.items())
# Would print [(3, '3'), (5, '5'), (4, '4'), (2, '2'), (1, '1')]
# Now 3 is in front

l.keys()  # Can get keys alone in MRU order
# Would print [3, 5, 4, 2, 1]

del l[4]  # Delete an item
print(l.items())
# Would print [(3, '3'), (5, '5'), (2, '2'), (1, '1')]

print(l.get_size())
예제 #11
0
파일: cache.py 프로젝트: ekaynar/d3nSim
class Cache:
    """Class representing D3N."""

    # Replacement policies
    LRU = "LRU"
    LFU = "LFU"
    LRU_S = "LRU_S"
    FIFO = "FIFO"
    RAND = "RAND"

    # Write policies
    WRITE_BACK = "WB"
    WRITE_THROUGH = "WT"

    # Layer
    L1 = "L1"
    L2 = "L2"

    consistent = "consistent"
    rendezvous = "rendezvous"
    rr = "rr"

    def __init__(self, layer, size, replace_pol, write_pol, hash_ring,
                 hash_type, obj_size, full_size, logger):
        self._replace_pol = replace_pol  # Replacement policy
        self._write_pol = write_pol  # Write policy
        self._layer = layer  # Layer info
        self._size = size  # Cache size
        self.spaceLeft = size  # Cache size
        self._logger = logger
        self.hashmap = {}  # Mapping
        self.hash_ring = hash_ring
        self._hash_type = hash_type
        self._obj_size = obj_size

        if (self._size == 0):
            self.zerosize = True
            self._size = 1
        else:
            self.zerosize = False

        if (self._replace_pol == Cache.LRU):
            self.cache = LRU(self._size)
        elif (self._replace_pol == Cache.FIFO):
            self.cache = deque()
        elif (self._replace_pol == Cache.LRU_S):
            self.cache = LRU(self._size)
            self.shadow = LRU(full_size)
            self.hist = []
            for i in range(full_size):
                self.hist.append(0)

    # Statistics
        self._hit_count = 0
        self._miss_count = 0
        self._backend_bw = 0
        self._crossrack_bw = 0
        self._intrarack_bw = 0
        self.miss_lat = 0
        self.lat_count = 0

    def _insert1(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.shadow[key] = 1

            if (int(size) <= self.spaceLeft):
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)
            else:
                while (int(size) > self.spaceLeft):
                    self._evict()
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)

    def _insert(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.cache[key] = int(size)
                self.shadow[key] = int(size)
            elif (self._replace_pol == Cache.LRU):
                self.cache[key] = int(size)
            else:
                if (int(size) <= self.spaceLeft):
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)
                else:
                    while (int(size) > self.spaceLeft):
                        self._evict()
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)

    def read1(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        if key in self.hashmap:
            if (self._replace_pol == Cache.LRU):
                self._update_use(key)
            elif (self._replace_pol == Cache.LRU_S):
                self._update_use(key)
            self._hit_count += 1
            r = 1
        else:
            self._miss_count += 1
        return r

    def read(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.cache.has_key(key):
                self._hit_count += 1
                self.cache[key] = self.cache[key]
                r = 1
            else:
                self._miss_count += 1

            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        else:
            if key in self.hashmap:
                if (self._replace_pol == Cache.LRU):
                    self._update_use(key)
                elif (self._replace_pol == Cache.LRU_S):
                    self._update_use(key)
                self._hit_count += 1
                r = 1
            else:
                self._miss_count += 1
        return r

    def checkKey(self, key):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return 0
        """Read a object from the cache."""
        r = 0

        if (self._replace_pol == Cache.LRU_S) or (self._replace_pol
                                                  == Cache.LRU):
            if self.cache.has_key(key):
                r = 1
            else:
                r = 0
        return r

    def _evict(self):
        if (self._replace_pol == Cache.LRU):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.LRU_S):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.FIFO):
            id = self.cache.popleft()
        self.spaceLeft += int(self.hashmap[id])
        del self.hashmap[id]

    def _update_use(self, key):
        """Update the use of a cache."""
        if (self._replace_pol == Cache.LRU):
            self.cache[key] = self.hashmap[key]
        if (self._replace_pol == Cache.LRU_S):
            self.cache[key] = self.hashmap[key]

    def set_cache_size(self, size):
        new_size = self.cache.get_size() + int(size)
        self.cache.set_size(int(new_size))

    def set_backend_bw(self, value):
        self._backend_bw += value

    def set_crossrack_bw(self, value):
        self._crossrack_bw += value

    def set_intrarack_bw(self, value):
        self._intrarack_bw += value

    def get_backend_bw(self):
        return self._backend_bw

    def get_crossrack_bw(self):
        return self._crossrack_bw

    def get_intrarack_bw(self):
        return self._intrarack_bw

    def get_replace_pol(self):
        return self._replace_pol

    def get_hit_count(self):
        return self._hit_count

    def get_miss_count(self):
        return self._miss_count

    def get_available_space(self):
        return self.spaceLeft

    def get_replace_poll(self):
        return self._replace_pol

    def reset_shadow_cache():
        self.shadow.clear()

    def print_cache(self):
        print self.cache

    def get_l2_address(self, key):
        if (self._hash_type == Cache.consistent):
            return self.hash_ring.get_node(key)
        elif (self._hash_type == Cache.rendezvous):
            return self.hash_ring.find_node(key)
        elif (self._hash_type == Cache.rr):
            val = key.split("_")[1]
            res = int(val) % int(self.hash_ring)
            return res
class RewardNet():
    """Interacts with and learns from the environment."""
    def __init__(self, state_action_size, reward_size):
        """Initialize an RewardNet object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
        """
        self.state_action_size = state_action_size
        self.reward_size = reward_size
        set_seed()

        # Reward-Network
        self.reward_net = Network(state_action_size, reward_size).to(device)
        self.optimizer = optim.Adam(self.reward_net.parameters(), lr=LR)
        self.criterion = nn.MSELoss()

        # Replay memory
        self.memory = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE, 0)
        # Reward dict - LRFU implementation not found, therefore just LRU
        self.M = LRU(BUFFER_SIZE)
        self.S = []
        self.V = 0
        # Initialize loss for tracking the progress
        self.loss = 0

    def add(self, state_action, reward):
        # Save experience in replay memory
        self.memory.add(state_action, reward)

    def add_to_M(self, sa, reward):
        # Add records to the reward dict
        self.M[sa] = reward
        if len(self.M) >= BUFFER_SIZE:
            del self.M[self.M.peek_last_item()[0]]  # discard LRU key

    def get_from_M(self, sa):
        # Retrieve items from M
        return (self.M.get(sa, 0))

    def step(self):
        # If enough samples are available in memory, get random subset and learn
        if len(self.memory) > BATCH_SIZE:
            experiences = self.memory.sample()
            self.learn(experiences)

    def act(self, state_action):
        """Returns actions for given state as per current policy.

            state (array_like): current state
        """
        sa = torch.from_numpy(state_action).float().unsqueeze(0).to(device)

        return (self.reward_net(sa))

    def learn(self, experiences):
        """Update value parameters using given batch of experience tuples.

            experiences (Tuple[torch.Tensor]): tuple of (sa, r) tuples 
        """
        state_actions, rewards = experiences

        # Get expected Reward values
        R_pred = self.reward_net(state_actions)

        # Compute loss
        loss = self.criterion(R_pred, rewards)
        print("RewardNet loss = {}".format(loss))
        # Grad descent
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        # Keep track of the loss for the history
        self.loss = loss.item()
class DatabaseEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, args={}):
        super(DatabaseEnv, self).__init__()

        # Number of actions that the database can take
        # { Create View, Do nothing }
        N_DISCRETE_ACTIONS = 2

        # Number of tables in the database being considered
        N_TABLES = 21
        N_JOIN_COMBINATIONS = int((N_TABLES * (N_TABLES - 1)) / 2)

        self.database = Database()
        self.table_names = self.database.get_table_names_from_hive()
        self.join_name_mappings = self.get_mapping_for_tables(self.table_names)

        # Maximum number of steps in an episode
        N_MAX_STEPS = 5
        N_MAX_JOINS = 2

        # Define action and observation space
        # They must be gym.spaces objects
        self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(N_JOIN_COMBINATIONS, ),
                                            dtype=np.uint8)

        # Capture information about episode to replay the same
        # on the real database
        self.max_steps = N_MAX_STEPS
        self.history = self.reset_env_history()
        self.current_step = 0
        self.current_views = []
        self.candidate_cost = 100
        exclusion_list = ['schema.sql', 'fkindexes.sql']
        self.queries = self.get_queries_from_dataset(
            '/home/richhiey/Desktop/workspace/dbse_project/Self-Driving-Materialized-Views/project/data/JOB',
            exclusion_list)
        pickle_file_path = '/home/richhiey/Desktop/workspace/dbse_project/Self-Driving-Materialized-Views/project/data/JOB/processed/job_processed.pickle'
        self.candidates = self.get_candidates_for_dataset(pickle_file_path)
        self.workload_distribution = self.get_workload_distribution(
            self.queries)
        self.current_candidate_queue = deque()
        self._obs_space = np.zeros(N_JOIN_COMBINATIONS)
        self._current_action = np.zeros(N_JOIN_COMBINATIONS)
        self.lru_cache_size = 20
        self.lru_cache = LRU(self.lru_cache_size)

    def get_mapping_for_tables(self, table_names):
        mapping = {}
        names = []
        for name in table_names:
            name = name[0]
            print(name)
            names.append(name)
        self.table_names = names
        num = 0
        for i in range(len(names)):
            for j in range(i + 1, len(names)):
                join_name = names[i] + '-' + names[j]
                num = num + 1
                mapping[num] = join_name
        print(mapping)
        return mapping

    def reset_env_history(self):
        history = {}
        for i in range(1, self.max_steps):
            history[i] = {'actions': [], 'query': ''}
        return history

    def get_workload_distribution(self, queries):
        # An array of the index value for weighting
        i = np.arange(len(queries))
        # Higher weights for larger index values
        w = np.exp(i / 10.)
        # Weight must be normalized
        w /= w.sum()
        return w

    def get_candidates_for_query(self, query):
        return self.candidates['data/JOB/' + query]

    def get_candidates_for_dataset(self, pickle_file_path):
        with open(pickle_file_path, 'rb') as pickle_file:
            candidates = pickle.load(pickle_file)
        new_candidates = {}
        for candidate in candidates:
            for key, value in candidate.items():
                new_candidates[key] = value
        return new_candidates

    def get_queries_from_dataset(self, dataset_path, exclusion_list):
        queries = []
        for root, dirs, files in os.walk(dataset_path):
            for file in files:
                if file in exclusion_list:
                    continue
                if '.sql' in file:
                    queries.append(file)
        return queries

    def step(self, action):
        # Use the action predicted by agent to modify the
        # database environment and calculate reward of the action
        delay_modifier = (self.current_step / self.max_steps)
        # print(self._obs_space)
        print(self.current_step)
        if not self.current_candidate_queue:
            self.current_step = self.current_step + 1
            self.selected_query = np.random.choice(
                self.queries, size=1, p=self.workload_distribution)[0]
            self.history[self.current_step]['query'] = self.selected_query
            candidates = self.get_candidates_for_query(self.selected_query)
            print(self.selected_query)
            for candidate in candidates:
                candidate = candidate.flatten()
                self.current_candidate_queue.append(candidate)

        current_candidate = self.current_candidate_queue.popleft()
        print('Action - ' + str(action))
        cand_idx = np.where(current_candidate == 1)[0]
        print('Candidate - ' + self.join_name_mappings[int(cand_idx)])
        self.lru_cache[self.selected_query] = current_candidate

        # Log some info about this training step
        self.history[self.current_step]['actions'].append({
            'action':
            action,
            'candidate':
            current_candidate,
            'obs_space':
            self._obs_space,
            'eviction':
            self.lru_cache.peek_last_item(),
        })

        reward, eviction = self._take_action(action, current_candidate,
                                             delay_modifier)
        print('Reward - ' + str(reward))
        done = self.current_step >= self.max_steps

        if done and len(self.current_candidate_queue):
            reward = get_final_reward_for_episode()
            info = {}
            done = True
        else:
            done = False

        obs = self._next_observation()

        return obs, reward, done, self.history

    # Reset the state of the environment to an initial state
    def reset(self):
        self.history = self.reset_env_history()
        self.current_step = 0
        self.current_views = []
        self.candidate_cost = 100
        self._obs_space = np.zeros(N_JOIN_COMBINATIONS)
        self._current_action = np.zeros(N_JOIN_COMBINATIONS)
        self.lru_cache = LRU(self.lru_cache_size)
        return self._next_observation()

    def render(self, mode='human', close=False):
        pass

    def _next_observation(self):
        return self._obs_space

    def env_cost_of_episode(self):
        run_time = 0
        for step, step_history in self.history.items():
            print('------------ Step - ' + str(step) + ' -------------')
            # First run the query and check the base cost
            query = step_history['query']
            print(query)
            with open(
                    os.path.join(
                        '/home/richhiey/Desktop/workspace/dbse_project/Self-Driving-Materialized-Views/project/data/JOB/',
                        query), 'r') as f:
                query_str = f.read()
                start_time = time.time()
                print('Actually executing on database now ..')
                query_output = self.database.execute_query(query_str)
                total_time = time.time() - start_time
                print('Time taken - ' + str(total_time))
                run_time = run_time + total_time
                print('Execution done!')

            def get_view_creation_query(tbl_1, tbl_2):
                view_name = str(tbl_1) + '_' + str(tbl_2)
                query_str = str(tbl_1) + ' JOIN ' + str(tbl_2) + ';'
                query_str = query_str + "CREATE VIEW IF NOT EXISTS " + view_name + " AS " + query_str
                return query_str

            # Then run through the history and get costs for the actions
            # taken by the agent
            if len(step_history['actions']) > 0:
                for step in step_history['actions']:
                    if step['action']:
                        idx = np.where(step['candidate'] == 1)
                        print(idx)
                        temp = self.join_table_mapping[int(idx)].split('-')
                        table_1 = temp[0]
                        table_2 = temp[1]
                        query_str = get_view_creation_query(table_1, table_2)
                        start_time = time.time()
                        query_output = self.database.execute_query(query_str)
                        total_time = time.time() - start_time
                        print('View Creation Time taken - ' + str(total_time))
                        run_time = run_time + total_time
            print('Total runtime - ' + str(run_time))
            print('---------------------------------------------------')
        return run_time

    def hawc_cost_for_episode(self):
        return np.random.randint(0, 100)

    def calculate_reward_for_episode(self):
        initial_reward = 20
        env_reward = self.env_cost_of_episode()
        print(env_reward)
        hawc_reward = self.hawc_cost_for_episode()
        return ((env_reward - initial_reward) /
                (hawc_reward - initial_reward)) * 1000

    def _take_action(self, action, candidate, delay_modifier):
        if action:
            # Add the created view to the obs space
            # self._obs_space = np.add(self._obs_space, candidate)
            # Calculate reward
            if self.current_step < self.max_steps - 1:
                reward = 1
            else:
                # - Do some magic to get cost of the queries
                # to calculate a useful cost for episode
                # - Calculate reward using that
                reward = self.calculate_reward_for_episode()
        else:
            # Add the created view to the obs space
            # Calculate reward
            if self.current_step < self.max_steps - 1:
                reward = 0
            else:
                # - Do some magic to get cost of the queries
                # to calculate a useful cost for episode
                # - Calculate reward using that
                reward = self.calculate_reward_for_episode()
        return reward, False
예제 #14
0
class Streamer:
    """ streamer for flows management """
    num_streamers = 0

    def __init__(self,
                 source=None,
                 capacity=128000,
                 active_timeout=120,
                 inactive_timeout=60,
                 user_metrics=None,
                 user_classifiers=None,
                 enable_ndpi=True):

        Streamer.num_streamers += 1
        self.__exports = []
        self.source = source
        self.__flows = LRU(capacity, callback=emergency_callback)  # LRU cache
        self._capacity = self.__flows.get_size(
        )  # Streamer capacity (default: 128000)
        self.active_timeout = active_timeout  # expiration active timeout
        self.inactive_timeout = inactive_timeout  # expiration inactive timeout
        self.current_flows = 0  # counter for stored flows
        self.flows_number = 0
        self.current_tick = 0  # current timestamp
        self.processed_packets = 0  # current timestamp
        # Python dictionaries to hold current and archived flow records
        self.flow_cache = OrderedDict()
        self.user_classifiers = {}
        if user_classifiers is not None:
            try:
                classifier_iterator = iter(user_classifiers)
                for classifier in classifier_iterator:
                    if isinstance(classifier, NFStreamClassifier):
                        self.user_classifiers[classifier.name] = classifier
            except TypeError:
                self.user_classifiers[user_classifiers.name] = user_classifiers
        self.user_metrics = {}
        if enable_ndpi:
            ndpi_classifier = NDPIClassifier('ndpi')
            self.user_classifiers[ndpi_classifier.name] = ndpi_classifier
        if user_metrics is not None:
            self.user_metrics = user_metrics

    def _get_capacity(self):
        """ getter for capacity attribute """
        return self.__flows.get_size()

    def _set_capacity(self, new_size):
        """ setter for capacity size attribute """
        return self.__flows.set_size(new_size)

    capacity = property(_get_capacity, _set_capacity)

    def terminate(self):
        """ terminate all entries in Streamer """
        remaining_flows = True
        while remaining_flows:
            try:
                key, value = self.__flows.peek_last_item()
                value.export_reason = 2
                self.exporter(value)
            except TypeError:
                remaining_flows = False

        for classifier_name, classifier in self.user_classifiers.items():
            self.user_classifiers[classifier_name].on_exit()

    def exporter(self, flow):
        """ export method for a flow trigger_type:0(inactive), 1(active), 2(flush) """
        # Look for the flow in the created classifiers
        for classifier_name, classifier in self.user_classifiers.items():
            # Terminate the flow in the respective classifiers
            self.user_classifiers[classifier_name].on_flow_terminate(flow)
        # Delete the flow register from the active flows collection
        del self.__flows[flow.key]
        # Decrease the number of active flows by 1
        self.current_flows -= 1
        # Add the expired flow register to the final flows collection
        self.__exports.append(flow)

    def inactive_watcher(self):
        """ inactive expiration management """
        remaining_inactives = True
        # While there are inactive flow registers
        while remaining_inactives:
            try:
                # Obtain the last flow register (Least Recently Used - LRU) in the variable value using its key
                key, value = self.__flows.peek_last_item()
                # Has the flow exceeded the inactive timeout (1 minute)?
                if (self.current_tick -
                        value.end_time) >= (self.inactive_timeout * 1000):
                    # Set export reason to 0 (inactive) in the flow
                    value.export_reason = 0
                    # Export the flow to the final flows collection
                    self.exporter(value)
                # There are no flows that can be declared inactive yet
                else:
                    # Stop the inactive watcher until it is called again
                    remaining_inactives = False
            except TypeError:
                remaining_inactives = False

    def consume(self, pkt_info):
        """ consume a packet and update Streamer status """
        self.processed_packets += 1  # increment total processed packet counter
        # Obtain a flow hash key for identification of the flow
        key = get_flow_key(pkt_info)
        print("\nCONSUMING PACKET FROM FLOW:", key)
        # Is this packet from a registered flow?
        if key in self.__flows:
            print("FLOW FOUND - UPDATING STATISTICS")
            # Checking current status of the flow that the packet belongs to
            # -1 active flow - 0 inactive flow - 1 active flow timeout expired - 2 flush remaining flows in LRU
            # 3 FIN flag detected - 4 RST flag detected
            flow_status = self.__flows[key].update_and_check_flow_status(
                pkt_info, self.active_timeout, self.user_classifiers,
                self.user_metrics)

            #Has the active timeout of the flow register expired (2 minutes)?
            if (flow_status == 1):
                # Export the old flow register to the final collection and terminate this flow process on the specified classifier
                self.exporter(self.__flows[key])
                # Create a new flow register for the current packet
                flow = Flow(pkt_info, self.user_classifiers, self.user_metrics,
                            self.flow_cache)
                # Add the new flow to the active flows collection using the same Hash key
                self.__flows[flow.key] = flow
                # Create the entry on the flow_cache with the flow key
                del self.flow_cache[flow.key]
                self.flow_cache[flow.key] = {}
                # Update the flow status on the collection
                flow.create_new_flow_record(pkt_info, self.user_classifiers,
                                            self.user_metrics)
            if (flow_status == 3
                ):  # FIN FLAG DETECTED IN BOTH DIRECTIONS - EXPORTING FLOW
                self.exporter(self.__flows[key])
            if (
                    flow_status == 4
            ):  # RST FLAG FOUND - UPDATING BIDIRECTIONAL STATISTICS - EXPORTING FLOW
                self.exporter(self.__flows[key])
            if (flow_status == 5):  # FIN FLAG TIMER EXPIRED
                self.exporter(self.__flows[key])
                print("****FLOW EXPORTED")
                """
                expired_flow = self.__flows[key]
                print("****STARTING TCP TIMER")
                threading.Timer(20, self.export_incomplete_flow(expired_flow))
                """

        # This packet belongs to a new flow
        else:
            # Increase the count of current active flows
            print("FLOW NOT FOUND - CREATING NEW FLOW REGISTER")
            # Update flow counters
            self.current_flows += 1
            self.flows_number += 1
            # Create the new flow object
            flow = Flow(pkt_info, self.user_classifiers, self.user_metrics,
                        self.flow_cache)
            # Add this new flow register to the LRU
            self.__flows[flow.key] = flow
            # Create the entry on the flow_cache with the flow key
            self.flow_cache[flow.key] = {}
            # Create the new bidirectional flow record
            flow.create_new_flow_record(pkt_info, self.user_classifiers,
                                        self.user_metrics)
            # Set the current start time on the streamer timer to keep control of the inactive flows
            self.current_tick = flow.start_time
            # Remove the Least Recently Used (LRU) flow record from the active flows collection
            # and export it to the final flows collection if its inactive timeout has been exceeded
            self.inactive_watcher()
        print(
            "*******************PACKET CONSUMED - MOVING TO NEXT*********************************"
        )

    """
    def export_incomplete_flow(self, expired_flow):
        print("##############################---TCP TIMER EXPIRED--#######################")
        # Look for the flow in the created classifiers
        self.flows_number += 1
        for classifier_name, classifier in self.user_classifiers.items():
            # Terminate the flow in the respective classifiers
            self.user_classifiers[classifier_name].on_flow_terminate(expired_flow)
        self.__exports.append(expired_flow)
        print("##############################---EXPIRED FLOW EXPORTED-----###############################")
    """

    def __iter__(self):
        # Create the packet information generator
        pkt_info_gen = Observer(source=self.source)
        # Extract each packet information from the network interface or pcap file
        for pkt_info in pkt_info_gen:
            if pkt_info is not None:
                # Check if the packet belongs to an existent flow or create a new one
                self.consume(pkt_info)
                for export in self.__exports:
                    yield export
                self.__exports = []
        # Terminate the streamer
        self.terminate()
        for export in self.__exports:
            yield export
        self.__exports = []