def test(job_records, params=None): # Calculate costs jobtimes = {} # for each job # print "exp", "opts", "job", \ # "cpu_millis", "map_millis", "red_millis", "hdfs_bytes_read", "map_output_bytes", \ # "map_cost", "red_cost", "total_cost" for record in job_records: if record.request_bytes == 0 or record.assert_bytes_r1 == 0: continue # cost_model = MR_cost_model(create_settings(record.exp, record.opts,params)) # cost = cost_model.get_mr_cost(record.hdfs_bytes_read/(1024.0**2), record.map_output_bytes/(1024.0**2), False, False, True) cost_model = MR_cost_model_gumbo(create_settings(record.exp, record.opts, params)) cost = cost_model.get_mr_cost( record.hdfs_bytes_read / (2 * 1024.0 ** 2), record.hdfs_bytes_read / (2 * 1024.0 ** 2), record.request_bytes / (1024.0 ** 2), record.assert_bytes_r1 / (1024.0 ** 2), True, ) # cost_model = MR_cost_model_io(create_settings(record.exp, record.opts,params)) # cost = cost_model.get_mr_cost(record.hdfs_bytes_read/(2*1024.0**2), record.hdfs_bytes_read/(2*1024.0**2), record.request_bytes/(1024.0**2), record.assert_bytes_r1/(1024.0**2), True) # cost_model = MR_cost_model_basic(create_settings(record.exp, record.opts, params)) # cost = cost_model.get_mr_cost(record.hdfs_bytes_read/(1024.0**2), record.map_output_bytes/(1024.0**2), False, True) # print record.exp, record.opts, record.job, # print record.assert_bytes_r1, record.request_bytes, # print record.cpu_millis, record.map_millis, record.red_millis, # print cost[0], cost[1], sum(cost[0:2]) # print cost[2], cost[3], cost[4] # jobtimes[record.job] = (record.cpu_millis, sum(cost[0:2]), record.opts) # jobtimes[record.job] = (record.map_millis + record.red_millis, sum(cost[0:2]), record.opts) jobtimes[record.job] = ( record.totalmaptime + record.totalreducetime + record.totalmergetime + record.totalshuffletime, sum(cost[0:2]), record.opts, ) # insert_cur.execute("INSERT INTO cost_estimations (job_id, map_cost, shuffle_cost, merge_cost, red_function_cost, red_cost, total_cost, cost_model) " # "VALUES(%s, %s, %s, %s, %s, %s, %s, %s)", # (record.job, cost[0], cost[2], cost[3], cost[4], cost[1], sum(cost[0:2]), "test-v2")) return report(jobtimes)
"q5-nogroup-red128": [ (1777777804,444444445,1879155409,544444445), # Gin, gin, Gout, gout (1777777804,444444445,1979155409,544444445), (1777777804,444444445,1979155409,544444445), # Gin, gin, Gout, gout (1777777804,444444445,1879155409,544444445), ], "q5-group-red128": [(1777777804*2, 444444445*2, 4258310818, 1088888890), ], } results = {} for key in inputs.keys(): print key, inputs[key] results[key] = [] cost_model = MR_cost_model_gumbo(create_settings("EXP_022", key, (1, 1, 1, 1, 50, 0.1, 1000))) costs = [] for (Gin, gin, Gout, gout) in inputs[key]: cost = cost_model.get_mr_cost(Gin / (1024**2), gin / (1024**2), Gout / (1024**2), gout / (1024**2), True) costs.append(sum(cost[0:2])) print "-", cost, sum(cost[:2]) print sum(costs) print "~" * 80 print