def plot_max_mem_error(q): blacklist = Utils.init_blacklist("config/mal_blacklist.txt") col_stats = ColumnStatsD.fromFile('config/tpch_sf10_stats.txt') for qno in q: logging.info("Testing query {}".format(qno)) q = "{}".format(qno) if qno < 10: q = "0{}".format(q) logging.info("loading training set...") d1 = MalDictionary.fromJsonFile( "traces/random_tpch_sf10/ran{}_200_sf10.json".format(qno), blacklist, col_stats) logging.info("loading test set...") d2 = MalDictionary.fromJsonFile("traces/tpch-sf10/{}.json".format(q), blacklist, col_stats) train_tags = d1.query_tags train_tags.sort() e = [] ind = [] for i in [ 1, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100, 125, 150, 175, 200 ]: d12 = d1.filter(lambda ins: ins.tag in train_tags[0:i]) print(len(d12.query_tags)) pG = d2.buildApproxGraph(d12) pmm = d2.predictMaxMem(pG) / 1000000000 mm = d2.getMaxMem() / 1000000000 e.append(100 * abs((pmm - mm) / mm)) ind.append(i) print(e) Utils.plotBar(ind, e, "results/memf_error_q{}.pdf".format(qno), 'nof training queries', 'error perc')
def plot_allmem_tpch10(path=""): blacklist = Utils.init_blacklist("config/mal_blacklist.txt") col_stats = ColumnStatsD.fromFile('config/tpch_sf10_stats.txt') e = [] for qno in range(1, 23): q = "0{}".format(qno) if qno < 10 else "{}".format(qno) logging.info("Examining Query: {}".format(q)) d1 = MalDictionary.fromJsonFile( "traces/random_tpch10/ran_q{}_n200_tpch10.json".format(q), blacklist, col_stats) d2 = MalDictionary.fromJsonFile("traces/tpch10/{}.json".format(q), blacklist, col_stats) pG = d2.buildApproxGraph(d1) pmm = d2.predictMaxMem(pG) / 1_000_000_000 mm = d2.getMaxMem() / 1_000_000_000 err = 100 * abs((pmm - mm) / mm) print("query: {}, pred mem: {}, actual mem: {}, error {}".format( qno, pmm, mm, err)) e.append(err) print(err) # TODO: use os.path.join for the following outf = path + "mem_error_1-23.pdf" Utils.plotBar(range(1, 23), e, outf, 'error perc', 'query no')