def generate_reqs(traces, totalTimeLength, unitLength, starts): print("traces", traces) lines = [] idxlist = [] uclnDict = [] global traceFileName traceFileName = path + "mix" + "_" + str(time.process_time()) + ".req" logfile = open(traceFileName, 'w') #混合文件 for trace in traces: #初始化 lines.append([]) idxlist.append(0) uclnDict.append({}) for i in range(len(traces)): start = starts[i] timeUnitEnd = start + unitLength timeEnd = start + totalTimeLength #这两步是否没有意义呢 load_lines(path, traces[i], totalTimeLength, start, lines[i], uclnDict[i]) #++ 计算每个trace的所需磁盘大小并构建对应文件模拟磁盘 bid = [] for line in lines[i]: items = line.strip().split(' ') bid.append(int(items[3])) #记录下所有的bid mkdir(diskpath + traces[i],max(bid)-min(bid)+1) #创建文件,模拟磁盘 trace_block_bigin[traces[i]]=min(bid) #记录下磁盘的开始块 # timeUnitEnd = unitLength timeEnd = totalTimeLength while True: for i in range(len(traces)): while True: if idxlist[i] >= len(lines[i]): break (mytime, rw, blkid) = parse_line(lines[i][idxlist[i]], "gen") mytime -= starts[i] if mytime > timeUnitEnd: break print(traces[i], mytime, rw, blkid, file=logfile) idxlist[i] += 1 # print("time", time, "timeUnitEnd", timeUnitEnd) timeUnitEnd += unitLength if timeUnitEnd > timeEnd: break sign = False for i in range(len(traces)): if idxlist[i] < len(lines[i]): sign = True break if not sign: break for i in range(len(traces)): print(traces[i], len(lines[i]), idxlist[i]) logfile.close() return uclnDict
def mtc_test_size_p(path, traceID, totalTimeLength, timeStart, sizerate, p): lines = [] uclnDict = {} req = 0 readReq = 0 load_lines(path, traceID, totalTimeLength, timeStart, lines, uclnDict) size = int(sizerate * len(uclnDict)) # 稀疏周期,size过小 if size <= 100: return print("size=", size) ssd = PLRU(size, p) for line in lines: (time, rw, blockid) = parse_line(line, "gen") req += 1 hit = ssd.is_hit(blockid) if rw == 0: readReq += 1 if rw == 1 and hit: ssd.add_update() ssd.update_cache(blockid) print(traceID, "size", size, p) print("total hit rate", 1.0 * ssd.hit / req, "update", ssd.update) global g if ssd.update > 1.0 * size * g * totalTimeLength: cost = 1.0 * ssd.update / g / totalTimeLength else: cost = size print(ssd.update, size, g * totalTimeLength, 1.0 * size * g * totalTimeLength, 1.0 * ssd.update / size, cost) logFile = open(logFilename, "a") print(traceID, timeStart / totalTimeLength, totalTimeLength / danwei, sizerate, size, p, 1.0 * ssd.hit / req, ssd.update, req, round(1.0 * readReq / req, 3), cost, sep=',', file=logFile) logFile.close()
def generate_reqs(traces, totalTimeLength, unitLength, starts): print("traces", traces) lines = [] idxlist = [] uclnDict = [] global traceFileName traceFileName = path + "mix" + "_" + str(time.clock()) + ".req" logfile = open(traceFileName, 'w') for trace in traces: lines.append([]) idxlist.append(0) uclnDict.append({}) for i in range(len(traces)): start = starts[i] timeUnitEnd = start + unitLength timeEnd = start + totalTimeLength load_lines(path, traces[i], totalTimeLength, start, lines[i], uclnDict[i]) timeUnitEnd = unitLength timeEnd = totalTimeLength while True: for i in range(len(traces)): while True: if idxlist[i] >= len(lines[i]): break (mytime, rw, blkid) = parse_line(lines[i][idxlist[i]], "gen") mytime -= starts[i] if mytime > timeUnitEnd: break print(traces[i], mytime, rw, blkid, file=logfile) idxlist[i] += 1 # print("time", time, "timeUnitEnd", timeUnitEnd) timeUnitEnd += unitLength if timeUnitEnd > timeEnd: break sign = False for i in range(len(traces)): if idxlist[i] < len(lines[i]): sign = True break if not sign: break for i in range(len(traces)): print(traces[i], len(lines[i]), idxlist[i]) logfile.close() return uclnDict
def process(traces, starts, totalTimeLength, unitLength, bsizeRate, csizeRate, policy): global g uclnDict = generate_reqs(traces, totalTimeLength, unitLength, starts) for i in range(len(traces)): print(traces[i], "ucln=", len(uclnDict[i])) # init cacheDict = {} p = (1, round(bsizeRate/csizeRate, 1)) dimdm = {} print(">>??") for i in range(len(traces)): #有和trace相同数量个cache trace = traces[i] cache = mtc_data_structure.Cache(trace, bsizeRate, csizeRate, len(uclnDict[i]), p, policy) cacheDict[trace] = cache #构建对应的cache,不同的只有ucln,即cache将访问的cache空间大小 # print(trace, cacheDict[trace].cache.get_size()) dimdm[trace] = policy["interval"] #时间间隔 #++ 初始化ssdDict ssdDict[trace]={} # # g=tbw/lifespan/capacity # size = get_total_size(cacheDict, "base") # g是1单位时间(10^-7s)内每个块的基准写入次数 # k1没有放进来,假设是1,租用1B1单位时间的价格为1 device = mtc_data_structure.Device(get_total_size(cacheDict, "cache"), g, cacheDict) #++ 模拟ssd 分配初始free_list mkdir(diskpath+"ssd",get_total_size(cacheDict, "cache")) # 根据cache的大小按顺序分配 tempbidbegin=0 # for trace in traces: # free_list[trace]={} # for bid in range(tempbidbegin,cacheDict[trace].cache.size): # free_list[trace][bid]=0 #以ssd中的bid为key,value里的0代表数据为clean 1代表数据为 # tempbidbegin+=cacheDict[trace].cache.size for trace in traces: free_list[trace]=[] for ssd_bid in range(tempbidbegin,cacheDict[trace].cache.size): free_list[trace].append(ssd_bid) tempbidbegin += cacheDict[trace].cache.size # # size,g,cachedict # print(csizeRate, bsizeRate, size, int(csizeRate/bsizeRate)*size, device.size) periodStart = 0 periodLength = 60*danwei reqs = get_reqs(traces) print("Reqs = ", len(reqs)) #得到混合情况下的所有请求 timestart = time.clock() debugCount = 0 # 遍历每个req,进行处理 for req in reqs: (trace, mytime, rw, blkid) = parse_line(req, "get") #从mix文件中解读出请求 # mytime += i*totalTimeLength # hit = cacheDict[trace].cache.get_hit() #对应cache的hit值,应该没用到 (needInmediateM, hit, update, evicted) = cacheDict[trace].do_req(rw, blkid) #处理请求,返回是否需要立刻更新 #++ 请求处理后需要真正对cache(或磁盘)进行读写操作 if update: #cache不命中,需要写入cache if (evicted==None): #不需要驱逐,说明cache中有空闲位,需要占用新的空闲位 if (len(free_list[trace])<1): print("error: no free space") sbid=free_list[trace].pop() #从尾巴空闲的ssd else: #需要驱逐 sbid=evicte_in_ssd(trace,evicted) #加载到ssd中 buffer=read_disk(trace,blkid) write_ssd(sbid,buffer) # 映射表中添加项 ssdDict[trace][blkid]={} ssdDict[trace][blkid]['ssd_bid']=sbid ssdDict[trace][blkid]['needwb']=0 #还未修改,此时不需要写回 if rw==1: #写操作 write_ssd(sbid,tempbuffer) ssdDict[trace][blkid]['needwb']=1 else: #读操作 Q:这里还需要再读一次吗 ssd_read(sbid) else: #cache命中或直接读取磁盘 if hit: #cache命中 sbid=ssdDict[trace][blkid]['ssd_bid']#从映射表中得到数据 if rw==1: #写操作 write_ssd(sbid,tempbuffer) ssdDict[trace][blkid]['needwb']=1 else: #读操作 ssd_read(sbid) else: #不借助cache,直接读写磁盘 if rw==1: #写操作 write_disk(trace,blkid,tempbuffer) else: #读操作 read_disk(trace,blkid) # # hit不足,触发【更新操作】 if needInmediateM and mytime-dimdm[trace]>=policy["interval"]: dimdm[trace] = mytime schemel = cacheDict[trace].get_hit_scheme() #能够提高命中率的更改列表deltas与deltap temp = device.try_modify(schemel) #返回修改方案改变的s和p (deltas, deltap) # device空间不足(即所有更改方案都不合适),需要强制更新全体缓存配置 if temp == None: debugCount+=1 if debugCount % 100 == 0: print("mydebugCount", debugCount) mytrace = trace potentials = [] # potentials里面 去掉需要更改的trace # 其实从逻辑上,是可以把schemel插入到potentials中的 # 只是考虑到schemel中可能有一些非sample的情况,update是错的,就没放 # 而且get_best_config是以write为第一优先级 # 但是schemel的优化是以命中率为第一优先级 for trace in traces: if trace==mytrace: continue l = cacheDict[trace].get_potential() potentials.append(l) # print("len of potentials=", len(potentials)) for scheme in schemel: #计划 (dlts, dltp, thit) = scheme # print(scheme) tsize = cacheDict[mytrace].cache.size+dlts (result, availSize) = device.get_best_config(potentials, tsize) # print("len resurlt=", len(result)) assert(len(result)<=len(traces)-1) if result==None or len(result)==0: continue sign = False device.usedSize = cacheDict[mytrace].cache.size #只计算mytrace的size? for i in range(len(traces)): if traces[i] == mytrace: sign = True continue if sign: item = result[i-1] else: item = result[i] device.try_modify([(item[0], item[1], None)]) #self.usedSize += deltas(item[0]) # try_modify在尝试的同时已经将device的usedsize修改了 #++ 更改所有cache的配置 (dellist, freenode)=cacheDict[traces[i]].change_config(item[0], item[1]) # # print(item, deltas, deltap) # 把剩余的size【都】分给hit不足的cache Q:是否就是保证了该device的usedsize一定等于device的size呢 for i in range(len(schemel)): (tsize, tp, thit) = schemel[i] availSize = device.size-device.usedSize #可以放在循环外? # print("293 availSize=", availSize) schemel[i] = (availSize, tp, thit) temp = device.try_modify(schemel) (deltas, deltap) = temp s = deltas + cacheDict[trace].cache.get_size() p = deltap + cacheDict[trace].cache.get_p() # print(temp) # print(s, p) #++ 更改配置 (dellist, freenode)=cacheDict[trace].change_config(s, p) # break # device空间足够,直接更改该trace的配置 # Q:这种情况存在吗?不是每次都会将device所有的size都分配给所有的cache吗 else: (deltas, deltap) = temp s = deltas + cacheDict[trace].cache.get_size() p = deltap + cacheDict[trace].cache.get_p() # 在前面try_modify已经调用过 #++ 更改配置 (dellist, freenode)=cacheDict[trace].change_config(s, p) # # print("hit不足需要更新", "trace=", trace, cacheDict[trace].req, # "baseline=", cacheDict[trace].baseline.get_hit(), # "cache=", cacheDict[trace].cache.get_hit(), # "deltas=", deltas, "deltap=", deltap, "s=", s, "p=", p, sep="\t") # print("test error needInmediateM") # sys.exit(0) # 一个周期结束,修改所有cache配置 if mytime - periodStart >= periodLength: print("one period stop!", mytime/periodLength) if policy["watch"][0]: #policy["watch"]是什么 if policy["watch"][1] < mytime: break elif int(mytime/periodLength)%policy["watch"][-1]==0: record_process(policy["watch"][2], cacheDict) periodStart = mytime potentials = [] for trace in traces: #potentials:所有trace的potential的【候选集】 # print("输出", trace, "的候选集:") l = cacheDict[trace].get_potential() # for tempPotential in l: # tempPotential.print_sample() if len(l) == 0: print("debug 候选集为空") break potentials.append(l) # print("len(potentials)=", len(potentials)) if len(potentials) < len(traces): #这个长度对比的目的是什么呢? result = [] else: (result, tsize) = device.get_best_config(potentials, 0) # 返回最好的配置 assert(len(result)<=len(traces)) #没有更好的候选集? if len(result) == 0: continue #直接处理下一条req # 防止修改方案失效 device.usedSize = 0 for i in range(len(traces)): # print("i=", i, ",trace=", traces[i], result[i].get_size(), result[i].get_p()) #尝试各个修改方案 temp = device.try_modify([(result[i][0], result[i][1], None)]) assert temp!=None #保证能够找到合适的修改方案,否则退出程序(?) #++ 更改配置 (dellist, freenode)=cacheDict[traces[i]].change_config(result[i][0], result[i][1]) # cacheDict[traces[i]].init_samples() print("after config", device.usedSize) if policy["watch"][0]: print_watch(policy["watch"][2], cacheDict, policy["watch"][-1]*periodLength) for i in range(len(traces)): cacheDict[traces[i]].finish() runTime = time.clock()-timestart #计算cache运行时间 print("consumed", runTime, "s") (traces, device, cacheDict, totalTimeLength, starts, periodLength, (bsizeRate, csizeRate), policy, runTime) os.remove(traceFileName)
def process(traces, starts, totalTimeLength, unitLength, bsizeRate, csizeRate, policy): global g uclnDict = generate_reqs(traces, totalTimeLength, unitLength, starts) for i in range(len(traces)): print(traces[i], "ucln=", len(uclnDict[i])) # init cacheDict = {} p = (1, round(bsizeRate / csizeRate, 1)) print(p) dimdm = {} for i in range(len(traces)): trace = traces[i] cache = mtc_data_structure.Cache(trace, bsizeRate, csizeRate, len(uclnDict[i]), p, policy) cacheDict[trace] = cache # print(trace, cacheDict[trace].cache.get_size()) dimdm[trace] = policy["interval"] # g=tbw/lifespan/capacity size = get_total_size(cacheDict, "base") print("cache size = ", get_total_size(cacheDict, "cache")) # g是1单位时间(10^-7s)内每个块的基准写入次数 # k1没有放进来,假设是1,租用1B1单位时间的价格为1 device = mtc_data_structure.Device(get_total_size(cacheDict, "cache"), g, cacheDict) # print("device size=", device.size) # print(csizeRate, bsizeRate, size, int(csizeRate/bsizeRate)*size, device.size) periodStart = 0 #change # periodLength = 60*danwei periodLength = 600 * danwei reqs = get_reqs(traces) print("Reqs = ", len(reqs)) #++ config时间统计 total_config_time = 0 timestart = time.clock() debugCount = 0 # for i in range(2,10): # 遍历每个req,进行处理 for req in reqs: (trace, mytime, rw, blkid) = parse_line(req, "get") # mytime += i*totalTimeLength hit = cacheDict[trace].cache.get_hit() #needInmediateM = cacheDict[trace].do_req(rw, blkid) (needInmediateM, hit, update, evicted) = cacheDict[trace].do_req(rw, blkid) # # 命中 # if cacheDict[trace].cache.get_hit() > hit: # if rw == 0: # pass # else: # # 写hit # myupdate[trace] += 1 # # Miss且触发更新操作 # elif cacheDict[trace].cache.get_top_n(1) == [blkid]: # myupdate[trace] += 1 # hit不足,触发更新操作 if needInmediateM and mytime - dimdm[trace] >= policy["interval"]: configbegin = time.clock() # print("enter hit scheme") # print("249=", trace) dimdm[trace] = mytime schemel = cacheDict[trace].get_hit_scheme() temp = device.try_modify(schemel) # device空间不足,需要强制更新全体缓存配置 if temp == None: debugCount += 1 if debugCount % 100 == 0: print("mydebugCount", debugCount) mytrace = trace potentials = [] # potentials里面去掉需要更改的trace # 其实从逻辑上,是可以把schemel插入到potentials中的 # 只是考虑到schemel中可能有一些非sample的情况,update是错的,就没放 # 而且get_best_config是以write为第一优先级 # 但是schemel的优化是以命中率为第一优先级 for trace in traces: if trace == mytrace: continue l = cacheDict[trace].get_potential() potentials.append(l) # print("len of potentials=", len(potentials)) for scheme in schemel: (dlts, dltp, thit) = scheme # print(scheme) tsize = cacheDict[mytrace].cache.size + dlts (result, availSize) = device.get_best_config(potentials, tsize) # print("len resurlt=", len(result)) assert (len(result) <= len(traces) - 1) if result == None or len(result) == 0: continue sign = False device.usedSize = cacheDict[mytrace].cache.size for i in range(len(traces)): if traces[i] == mytrace: sign = True continue if sign: item = result[i - 1] else: item = result[i] device.try_modify([(item[0], item[1], None)]) (dellist, freenode) = cacheDict[traces[i]].change_config( item[0], item[1]) # print(item, deltas, deltap) # 把剩余的size都分给hit不足的cache for i in range(len(schemel)): (tsize, tp, thit) = schemel[i] availSize = device.size - device.usedSize # print("293 availSize=", availSize) schemel[i] = (availSize, tp, thit) temp = device.try_modify(schemel) (deltas, deltap) = temp s = deltas + cacheDict[trace].cache.get_size() p = deltap + cacheDict[trace].cache.get_p() # print(temp) # print(s, p) (dellist, freenode) = cacheDict[trace].change_config(s, p) break else: (deltas, deltap) = temp s = deltas + cacheDict[trace].cache.get_size() p = deltap + cacheDict[trace].cache.get_p() # 在前面try_modify已经调用过 (dellist, freenode) = cacheDict[trace].change_config(s, p) configend = time.clock() total_config_time += (configend - configbegin) # print("hit不足需要更新", "trace=", trace, cacheDict[trace].req, # "baseline=", cacheDict[trace].baseline.get_hit(), # "cache=", cacheDict[trace].cache.get_hit(), # "deltas=", deltas, "deltap=", deltap, "s=", s, "p=", p, sep="\t") # print("test error needInmediateM") # sys.exit(0) # 一个周期结束,修改所有cache配置 if mytime - periodStart >= periodLength: configbegin = time.clock() print("one period stop!", mytime / periodLength) if policy["watch"][0]: if policy["watch"][1] < mytime: break # else: elif int(mytime / periodLength) % policy["watch"][-1] == 0: record_process(policy["watch"][2], cacheDict) periodStart = mytime potentials = [] for trace in traces: # print("输出", trace, "的候选集:") l = cacheDict[trace].get_potential() # for tempPotential in l: # tempPotential.print_sample() if len(l) == 0: print("debug 候选集为空") break potentials.append(l) # print("len(potentials)=", len(potentials)) if len(potentials) < len(traces): result = [] else: (result, tsize) = device.get_best_config(potentials, 0) assert (len(result) <= len(traces)) if len(result) == 0: continue # 防止修改方案失效 device.usedSize = 0 for i in range(len(traces)): # print("i=", i, ",trace=", traces[i], result[i].get_size(), result[i].get_p()) temp = device.try_modify([(result[i][0], result[i][1], None)]) assert temp != None (dellist, freenode) = cacheDict[traces[i]].change_config( result[i][0], result[i][1]) cacheDict[traces[i]].init_samples() print("after config", device.usedSize) configend = time.clock() total_config_time += (configend - configbegin) if policy["watch"][0]: print_watch(policy["watch"][2], cacheDict, policy["watch"][-1] * periodLength) for i in range(len(traces)): cacheDict[traces[i]].finish() runTime = time.clock() - timestart print("consumed", runTime, "s") print_result(traces, device, cacheDict, totalTimeLength, starts, periodLength, (bsizeRate, csizeRate), policy, runTime, total_config_time) os.remove(traceFileName)