def request_alg_run_plot_data(trace_name, algorithm, cache_size, plot, minio_get): alg_result = storage.find_trace_run(trace_name, algorithm, cache_size) alg_result_id = alg_result.get("_id") alg_result_cache_size = alg_result.get("cache_size") alg_result_algorithm = alg_result.get("algorithm") header = "./graph data/" + trace_name + "_" + str( alg_result_cache_size) + "_" + str( alg_result_algorithm) + "_" + plot + '.npz' minio_id = str(alg_result_id) + plot #print(minio_id) tmp = tempfile.NamedTemporaryFile(suffix='.npz') #success = minio_get.retrieve(header, minio_id) success = minio_get.retrieve(tmp.name, minio_id) if success: pretty.success(header + ' downloaded succesfully and ready for plotting') file = np.load(tmp.name, allow_pickle=True) return file # tmp.name else: pretty.failure("plot data not loaded succesfully") return success
def get_over_time_list(trace_name, minio_get, app_id=None): print("in get over time list") '''filename = './graph data/' + trace_name + '_time.npz' if os.path.exists(filename): pretty.success("file already downloaded to disk") data_time = np.load(('./graph data/' + trace_name + '_time.npz'), allow_pickle = True) data_time = data_time['arr_0'] data_time = data_time[()] over_time = normalize_real_time(data_time) return over_time''' trace = storage.find_trace(trace_name) print(trace_name) print(trace) if trace == None: print(trace_name, "could not be found in mongodb") return False else: trace_id = trace.get("_id") trace_uniques = trace.get("unique") pretty.utility("Mongo contacted.. retrieving over time array for " + trace_name) #print(trace_name + "===========================") #print(trace_id, " received id for", trace_name) tmp = tempfile.NamedTemporaryFile(suffix='.npz') #real time minio_id_time = str(trace_id) + '-time' #res = minio_get.retrieve( ('./graph data/' + trace_name + '_time.npz'), minio_id_time) res = minio_get.retrieve(tmp.name, minio_id_time) #due to how npz files are compressed, the data must be accessed with a key even if there is only 1 thing in the uncompressed npz file #since there is only 1 thing and no name is specified the first thing will always have the name 'arr_0' #similarly, the data_time[()] argument is to deal with the 0 dimensional array that is returned. if res: #data_time = np.load(('./graph data/' + trace_name + '_time.npz'), allow_pickle = True) data_time = np.load(tmp.name, allow_pickle=True) data_time = data_time['arr_0'] data_time = data_time[()] over_time = normalize_real_time(data_time) pretty.success("Mongo retrieval successful") return over_time else: sg.sf_singleTraceGen(trace_name, app_id, minio_only=True) #result = minio_get.retrieve( ('./graph data/' + trace_name + '_time.npz'), minio_id_time) result = minio_get.retrieve(tmp.name, minio_id_time) if result: data_time = np.load(tmp.name, allow_pickle=True) data_time = data_time['arr_0'] data_time = data_time[()] over_time = normalize_real_time(data_time) pretty.success("Mongo retrieval successful") return over_time else: return False
def store_flag(flag, param_list): plots = db.plots_tracked plots_obj = plots.find_one({'flag': flag}) if plots_obj == None: pretty.utility("plot not found, adding to mongo") plot_data = {} plot_data['flag'] = flag plot_data['params'] = param_list plots.insert_one(plot_data) else: pretty.success(flag, param_list, "already present")
def retrieve(self, file, id): pretty.utility("Minio contacted >>>") try: self.s3.Bucket('visual-data').download_file( id, file) #attemps to download except ClientError: pretty.failure("File does not exist") #message if file error return False except EndpointConnectionError: pretty.failure( "Connection was not established") #message if connection error return False pretty.success("Minio retrieval successful <<< ") return True
def insert(self, file, id): pretty.contact("IN MINIO INSERT METHOD --- CURRENTLY INSERTING " + file) try: self.s3.Bucket('visual-data').upload_file(file, id) #attempts to upload except ClientError: pretty.failure( "File does not exist on your pc") #message if file error return False except EndpointConnectionError: print(EndpointConnectionError) pretty.failure("Connection was not established" ) # message if connection error return False pretty.success(file + " uploaded") return True
def s_minio_confirm(traceId, traceName): toMinio = Minio_Module.Minio_Module() hId = traceId + '-histogram' #rqfId = traceId + 'rqf' rqId = traceId + '-rq' reuId = traceId + '-reuse' timeId = traceId + '-time' if toMinio.exist(hId): storage.workload_confirmed(traceName, requestFrequency=1) else: pretty.failure("Request frequency upload for " + traceName + " to minio failed") return False if toMinio.exist(rqId): storage.workload_confirmed(traceName, request=1) else: pretty.failure("Request frequency upload for " + traceName + " to minio failed") return False if toMinio.exist(reuId): storage.workload_confirmed(traceName, reuseDistance=1) else: pretty.failure("Reuse distance upload for " + traceName + " to minio failed") return False if toMinio.exist(timeId): storage.workload_confirmed(traceName, time=1) else: pretty.failure("Real time array upload for " + traceName + " to minio failed") return False pretty.success("s_minio_confirm" + traceName + "confirmed uploaded to database") return True
def workload_confirmed(traceName, request=None, requestFrequency=None, reuseDistance=None, time=None): pretty.utility("CONFIRMING THAT" + traceName + "DATA WAS UPLOADED TO MINIO") traces = db.workload if request != None: pretty.success("REQUEST ARRAY CONFIRMED UPLOADED TO MINIO") traces.update_one( {'trace name': traceName}, {'$set': { 'requests uploaded to minio': datetime.now() }}) if requestFrequency != None: pretty.success("REQUEST FREQUENCY ARRAY CONFIRMED UPLOADED TO MINIO") traces.update_one( {'trace name': traceName}, {'$set': { 'request frequency uploaded to minio': datetime.now() }}) if reuseDistance != None: pretty.success("REUSE DISTANCE ARRAY CONFIRMED UPLOADED TO MINIO") traces.update_one( {'trace name': traceName}, {'$set': { 'reuse distance uploaded to minio': datetime.now() }}) if time != None: pretty.success("REAL TIME ARRAY CONFIRMED UPLOADED TO MINIO") traces.update_one( {'trace name': traceName}, {'$set': { 'REAL TIME uploaded to minio': datetime.now() }})
def sf_singleTraceGen(trace, app_id=None, minio_only=False): print("IN SINGLE TRACE GEN") print(trace) #traces = generateTraceNames(trace) traces = trace print(trace) traceplot = {} print('single trace gen:') print("\nTRACE NAME ================================== " + trace) if app_id != None: conduit.current_status[ app_id] = "getting " + trace + ' from sftp server... please wait' if sftp_data.get(trace) == None: print("getting data from sftp server...") filename, trace_data = sftp_r.direct_query(trace) sftp_data[trace] = [filename, trace_data] print("finished getting data from sftp server...") print(filename) else: filename = sftp_data[trace][0] trace_data = sftp_data[trace][1] pretty.success("*\n*\n*\n*\n*\n*\n*\nREUSE") if app_id != None: conduit.current_status[ app_id] = 'Processing trace data for ' + trace + '... please wait this can take over 10 minutes' # this is new ---------- reader = sf_filereader.identify_trace(filename, trace_data) count = 0 accesses = {} req = [] num = 0 #keys in this dicitonary are the sanitized LBAs and the values are number of access reqFreq = [] histogram = {} reuse_distance = {} timestamps = [] max_accesses = 0 max_reused = 0 #reuse_distance is a list of lists where the key is the lba and the value is the list of when that lba is called. #we use this list later on to calculate reuse distance frequency distribution. for lba in reader.read_all(): # instead of reader.read() count += 1 if lba not in accesses.keys(): accesses[lba] = num reqFreq.append(0) #now a list num += 1 reuse_distance[accesses[lba]] = [count] else: reuse_distance[accesses[lba]].append(count) #we are checking for max reused to determine the size of the resulting array req.append(accesses[lba]) reqFreq[accesses[lba]] += 1 times_accessed = reqFreq[accesses[lba]] if max_reused < len(reuse_distance[accesses[lba]]): max_reused = len(reuse_distance[accesses[lba]]) #this tells us upper bound for access frequency's x axis if times_accessed > max_accesses: max_accesses = times_accessed #the keys in histogram are the number of times a reuse distance has been counted #the reqFreq array keeps track of individual LBAs and how many times they have been called so far if histogram.get(times_accessed) == None: histogram[times_accessed] = 1 else: histogram[times_accessed] += 1 if times_accessed > 1: histogram[times_accessed - 1] -= 1 histogram[0] = max_accesses reuse_distance['max_reused'] = max_reused entry = {} curdata = { 'number of requests': reader.requests, 'number of unique requests': reader.uniques, 'number of reuses': reader.reuses, 'number of writes': reader.writes, 'time': reader.requests, } entry[trace] = curdata #print(str(curdata)) # ============================================ writes check - remove #requestFrequencyOutput = trace + "_REQUEST_FREQ" #requestsOutput = trace + "_REQUEST" #np.savez_compressed(requestFrequencyOutput, reqFreq) #np.savez_compressed(requestsOutput, req) # np.savez_compressed(trace, req, regFreq, misses, hitPerc, ios, polOT, missRate) print("TRACE NAME ------------------------- :", trace) if app_id != None: conduit.current_status[ app_id] = trace + ' data processed, uploading to server... please wait' #this is where the data gets sent to mongodb mongo_trace_id = serv.s_to_database(trace, reader.requests, reader.uniques, reader.reuses, reader.writes, reader.requests, req, histogram, reuse_distance, reader.time_stamp, minio_only) mongo_trace_id = str(storage.find_id(trace)) confirmed = serv.s_minio_confirm(mongo_trace_id, trace) if confirmed: return mongo_trace_id else: return False
def getAlgStats(algname, cache_size, config, trace_name, app_id=None, minio_only=False): print("in get alg stats") print("==============================================") #alg = get_algorithm(algname)(cache_size, **self.alg_args) alg_args = parseConfig(config, algname) avg_pollution = 0 misses = 0 ios = 1 #general alg data lba_map = {} hitPerc = [] #percentage of hits over time missRate = [] #miss rate over time timestamps = [] count = 0 evict = 0 polOT = [] #pollution over time #special alg data: depending on which algorithm is called, other information is collected as the algorithm is run dlirs_HIR_s = [] lecar_lru = [] arc_p = [] cacheus_learning_rate = [] cacheus_lru = [] cacheus_q = [] # ------------ plot for hit rate over learning rate lecarLR = -1 # for any lecar w. fixed learning rate # evictions data all_evictions = 0 evRate = [] # total number of evictions @ point in time evictions = [] # number of evictions per requested LBA -SH #trace_type = identify_trace(trace_name) #trace_reader = get_trace_reader(trace_type) if app_id != None: conduit.current_status[app_id] = "getting data from sftp server..." if sftp_data.get(trace_name) == None: print("getting data from sftp server...") filename, trace_data = sftp_r.direct_query(trace_name) sftp_data[trace_name] = [filename, trace_data] print("finished getting data from sftp server...") print(filename) else: filename = sftp_data[trace_name][0] trace_data = sftp_data[trace_name][1] pretty.success("*\n*\n*\n*\n*\n*\n*\nREUSE") print("getting data from sftp server...") filename, trace_data = sftp_r.direct_query(trace_name) print("finished getting data from sftp server...") # this is new ---------- reader = sf_filereader.identify_trace(filename, trace_data) print(filename, " ***************************** ********************** ") print(algname) alg = get_algorithm(algname)(cache_size, **alg_args) print(alg) timestamps.append(0) #time init_timestamp = time.perf_counter() trace = storage.find_trace(trace_name) trace_requests = trace.get("requests") if app_id != None: conduit.current_status[ app_id] = "processing " + trace_name + algname + str( cache_size) + " this can take over 10 minutes..." ios, misses, hitPerc, missRate, polOT, dlirs_HIR_s, arc_p, cacheus_learning_rate, cacheus_lru, cacheus_q, lecar_lru = readerLoop( reader, alg, hitPerc, missRate, polOT, algname, lba_map, trace_requests) lecarLR = alg.learning_rate if algname.lower() == 'lecar' else -1 avg_pollution = np.mean(alg.pollution.Y) ios = reader.requests hits = ios - misses hitRate = round(100 * (hits / ios), 2) finalEvictionR = round(100 * (all_evictions / ios)) #final eviction rate finalEvictionT = len(evictions) print( "\nResults: {:<10} size={:<8} hits={}, ios = {}, misses={}, hitrate={:4}% avg_pollution={:4}% {}" .format(algname, cache_size, hits, ios, misses, hitRate, round(avg_pollution, 2), trace_name, *alg_args.items())) print(dlirs_HIR_s, lecar_lru, arc_p, cacheus_learning_rate, cacheus_lru, cacheus_q) mongo_trace_runs_id = serv.as_to_Database( trace_name, algname, cache_size, ios, hits, hitRate, misses, round(avg_pollution, 2), finalEvictionR, finalEvictionT, hitPerc, missRate, polOT, minio_only, dlirs_HIR_s, lecar_lru, arc_p, cacheus_learning_rate, cacheus_lru, cacheus_q) if app_id != None: conduit.current_status[ app_id] = "processing " + trace_name + algname + str( cache_size) + " complete" print("---------------------MONGO TRACE RUNS ID BEING SENT TO CONFIRM", mongo_trace_runs_id) #if not minio_only: confirmed = serv.as_minio_confirm(mongo_trace_runs_id, trace_name, algname, cache_size) if confirmed: return mongo_trace_runs_id else: return -1
def trace_run_confirmed(traceId, hit_rate=None, miss_rate=None, pollution=None, dlirs=None, lecar=None, arc=None, cacheus=None): #print("IN TRACE RUN CONFIRM METHOD") alg_stats = db.trace_runs pretty.utility("CONFIRMING THAT ALGORITHM DATA WAS UPLOADED TO MINIO") print(traceId) if hit_rate != None: pretty.success("HIT RATE ARRAY CONFIRMED UPLOADED TO MINIO") alg_stats.update_one( {'_id': ObjectId(traceId)}, {'$set': { 'hit rate array uploaded to minio': datetime.now() }}) if miss_rate != None: pretty.success("MISS RATE ARRAY CONFIRMED UPLOADED TO MINIO") alg_stats.update_one( {'_id': ObjectId(traceId)}, {'$set': { 'miss rate array uploaded to minio': datetime.now() }}) if pollution != None: pretty.success("POLLUTION ARRAY CONFIRMED UPLOADED TO MINIO") alg_stats.update_one( {'_id': ObjectId(traceId)}, {'$set': { 'pollution array uploaded to minio': datetime.now() }}) if dlirs != None: pretty.success("DLIRS STACK ARRAY CONFIRMED UPLOADED TO MINIO") alg_stats.update_one( {'_id': ObjectId(traceId)}, {'$set': { 'dlirs stack array uploaded to minio': datetime.now() }}) if lecar != None: pretty.success("LECAR LRU SIZE ARRAY CONFIRMED UPLOADED TO MINIO") alg_stats.update_one({'_id': ObjectId(traceId)}, { '$set': { 'lecar lru size array uploaded to minio': datetime.now() } }) if arc != None: pretty.success("ARC P VALUE ARRAY CONFIRMED UPLOADED TO MINIO") alg_stats.update_one( {'_id': ObjectId(traceId)}, {'$set': { 'arc p size array uploaded to minio': datetime.now() }}) if cacheus != None: pretty.success( "CACHEUS LEARNING RATE, LRU VALUE AND Q VALUE CONFIRMED UPLOADED TO MINIO" ) alg_stats.update_one({'_id': ObjectId(traceId)}, { '$set': { 'cacheus learning rate array uploaded to minio': datetime.now() } }) alg_stats.update_one({'_id': ObjectId(traceId)}, { '$set': { 'cacheus lru size array uploaded to minio': datetime.now() } }) alg_stats.update_one({'_id': ObjectId(traceId)}, { '$set': { 'cacheus q value array uploaded to minio': datetime.now() } })
def as_minio_confirm(traceId, traceName, algname, cache_size): fullTraceName = traceName + " " + algname + " " + str(cache_size) toMinio = Minio_Module.Minio_Module() pretty.utility("CONFIRMING IF ARRAYS FOR " + fullTraceName + " WERE UPLOADED SUCCESSFULLY") hit_rate_id = traceId + '-hit_rate' miss_rate_id = traceId + '-miss_rate' pollution_id = traceId + '-pollution' if algname.lower() == 'dlirs': dlirs_stack_id = traceId + '-dlirs_stack' if toMinio.exist(dlirs_stack_id): storage.trace_run_confirmed(traceId, dlirs=1) else: return False if algname.lower() == 'lecar': lecar_lru_id = traceId + '-lecar_lru' if toMinio.exist(lecar_lru_id): storage.trace_run_confirmed(traceId, lecar=1) else: return False if algname.lower() == 'arc': arc_p_id = traceId + '-arc_p' if toMinio.exist(arc_p_id): storage.trace_run_confirmed(traceId, arc=1) else: return False if algname.lower() == 'cacheus': cacheus_learning_rate_id = traceId + '-cacheus_learning_rate' cacheus_lru_id = traceId + '-cacheus_lru' cacheus_q_id = traceId + '-cacheus_q' if toMinio.exist(cacheus_learning_rate_id) and toMinio.exist( cacheus_lru_id) and toMinio.exist(cacheus_q_id): storage.trace_run_confirmed(traceId, cacheus=1) else: return False if toMinio.exist(hit_rate_id): storage.trace_run_confirmed(traceId, hit_rate=1) else: pretty.failure("Hit rate array for " + fullTraceName + "to minio failed") return False if toMinio.exist(hit_rate_id): storage.trace_run_confirmed(traceId, miss_rate=1) else: pretty.failure("Miss rate array for " + fullTraceName + "to minio failed") return False if toMinio.exist(hit_rate_id): storage.trace_run_confirmed(traceId, pollution=1) else: pretty.failure("Pollution rate array for " + fullTraceName + "to minio failed") return False pretty.success( "==============================" + fullTraceName + " succesfully uploaded ==========================================") return True
def mongo_new_paths(config): pretty.utility("CONTACTING MONGODB... CHECKING FOR NEW PATHS") newTraces = [] for traces in config['traces']: for trace_name in sg.generateTraceNames(traces): mongo_trace = storage.find_trace(trace_name) if mongo_trace == False: newTraces.append(trace_name) else: pretty.success("Trace: " + mongo_trace["trace name"]) pretty.success("Total number of requests: " + str(mongo_trace['requests'])) pretty.success("Total number of unique requests: " + str(mongo_trace['unique'])) pretty.success("Total number of reuses: " + str(mongo_trace['reuses'])) pretty.success("Total number of writes: " + str(mongo_trace['writes'])) pretty.success("Total time: " + str(mongo_trace['time'])) pretty.success('\n') return newTraces
def mongo_get_trace_run_plot_data(trace_name, plot, minio_get, app_id=None): '''filename = "./graph data/"+str(trace_name) + str(plot) +'.npz' if os.path.exists(filename): pretty.success("file already downloaded to disk") return filename''' print(plot, "********************************") print(trace_name) trace = storage.find_trace(trace_name) if trace == False: pretty.utility("plot data for " + trace_name + plot + " was not found, inserting into database...") if app_id != None: conduit.curret_status[ app_id] = "plot data for " + trace_name + " " + plot + " was not found, inserting into database..." confirmed = sg.sf_singleTraceGen(trace_name, app_id) trace = storage.find_trace(trace_name) trace_id = trace.get("_id") minio_id = str(trace_id) + plot tmp = tempfile.NamedTemporaryFile(suffix='.npz') #success = minio_get.retrieve(header, minio_id) #success = minio_get.retrieve(tmp.name, minio_id) if confirmed: success = minio_get.retrieve(tmp.name, minio_id) if success: return tmp # tmp.name else: return False else: pretty.failure("error uploading trace data to database") return False else: trace_id = trace.get("_id") trace_ios = trace.get("requests") #print("Mongo contacted") #print(trace_id, " received id for", trace_name, plot) minio_id = str(trace_id) + plot #print("mongo_get_trace_run_plot_data", minio_id) pretty.utility("retrieving plot data from minio...") tmp = tempfile.NamedTemporaryFile(mode='w+b', suffix='.npz') success = minio_get.retrieve(tmp.name, minio_id) if success: pretty.success("plot data for " + trace_name + plot + " successfully downloaded") #print(filename) file = np.load(tmp.name, allow_pickle=True) return file # tmp.name else: pretty.failure( "data not found in minio store... processing trace data...") confirmed = sg.sf_singleTraceGen(trace_name, minio_only=True) success = minio_get.retrieve(tmp.name, minio_id) if success: file = np.load(tmp.name, allow_pickle=True) return file # tmp.name else: print("error uploading data to minio") return False