def add_latency_measure(g, central_cube, root_node, tti, hti, latencylog, interval=100): #time tuple-index and host tuple-index, latency_measure_op = jsapi.LatencyMeasureSubscriber(g, tti, hti, interval_ms=interval) latency_measure_op.instantiate_on(root_node) echo_op = jsapi.Echo(g); echo_op.set_cfg("file_out", latencylog) echo_op.instantiate_on(root_node) g.chain([central_cube, latency_measure_op, echo_op])
def main(): parser = OptionParser() parser.add_option("-C", "--config", dest="config_file", help="read config from FILE", metavar="FILE") parser.add_option("-a", "--controller", dest="controller", help="controller address", default="localhost:3456") (options, args) = parser.parse_args() serv_addr, serv_port = normalize_controller_addr(options.controller) file_to_parse = args[0] k2 = 20 #how many to pull to top level k = 10 #how many to display ### Define the graph abstractly, without a computation g = jsapi.QueryGraph() reader = jsapi.FileRead(g, file_to_parse) parse = jsapi.GenericParse(g, ".*GET ([^ ]*) .*", "s") local_cube = g.add_cube("local_results") local_cube.add_dim("url", Element.STRING, 0) # cube.add_dim("hostname", Element.STRING, 1) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1) local_cube.set_overwrite(True) #fresh results pull_k2 = jsapi.TimeSubscriber(g, {}, 2000, "-count", k2) echo = jsapi.Echo(g) # local_cube = jsapi.Echo(g) g.connect(reader, parse) g.connect(parse, local_cube) g.connect(local_cube, pull_k2) g.connect(pull_k2, echo) # Should do a pull into a consolidated cube #### Finished building in memory, now to join server = RemoteController((serv_addr, serv_port)) n = server.get_a_node() assert isinstance(n, NodeID) all_nodes = server.all_nodes() local_cube.instantiate_on(all_nodes) server.deploy(g)
def test_dummy_edges(self): g = QueryGraph() f = jsapi.FileRead(g, "some file") echo_op = jsapi.Echo(g) g.connect(f, echo_op, bwLimit=0) pb = g.get_deploy_pb() self.assertEquals(len(pb.alter[0].edges), 0) echo_op.set_inlink_bwcap(100) pb = g.get_deploy_pb() self.assertEquals(len(pb.alter[0].edges), 1) self.assertEquals(pb.alter[0].edges[0].max_kb_per_sec, 100.0)
def main(): parser = standard_option_parser() (options, args) = parser.parse_args() all_nodes, server = get_all_nodes(options) root_node = find_root_node(options, all_nodes) source_nodes = get_source_nodes(options, all_nodes, root_node) g = jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) central_cube = define_raw_cube(g, "global_coral_urls", root_node, overwrite=True) if not options.no_echo: pull_q = jsapi.TimeSubscriber(g, {}, 30000, sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg( "rollup_levels", "6,0,1" ) # every five seconds to match subscription. Roll up counts. pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "10,0,1") # roll up response code and referer tput_merge.instantiate_on(root_node) g.chain([tput_merge, central_cube]) for node in source_nodes: local_cube = define_raw_cube(g, "local_records", node, overwrite=False) # print "cube output dimensions:", local_cube.get_output_dimensions() pull_from_local = jsapi.MultiRoundClient(g) pull_from_local.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local, tput_merge]) deploy_or_dummy(options, server, g)
def get_graph(source_nodes, root_node, options): ECHO_RESULTS = not options.no_echo g = jsapi.QueryGraph() BOUND = 100 start_ts = parse_ts(options.start_ts) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_fidxs) ] global_results = g.add_cube("global_slow") define_schema_for_raw_cube(global_results, parsed_field_offsets) global_results.instantiate_on(root_node) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) g.connect(congest_logger, global_results) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) # pull_q.set_cfg("rollup_levels", "8,1") # pull_q.set_cfg("simulation_rate",1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([global_results, pull_q, echo]) for node, i in numbered(source_nodes, False): f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "true") f.instantiate_on(node) filter = jsapi.RatioFilter(g, numer=coral_fidxs['dl_utime'], \ denom = coral_fidxs['nbytes'], bound = BOUND) g.chain([f, csvp, round, filter, congest_logger]) return g
def generate_and_run(options, all_nodes, server, k): root_node = find_root_node(options, all_nodes) source_nodes = get_source_nodes(options, all_nodes, root_node) g = jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) central_cube = define_raw_cube(g, "global_coral_urls", root_node, overwrite=True) if not options.no_echo: pull_q = jsapi.DelayedSubscriber(g, {}, sort_order="-count", num_results=k) # pull_q.set_cfg("ts_field", 0) # pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "0,0,1") pull_q.set_cfg("window_offset", 20 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) tput_merge = jsapi.MultiRoundCoord(g) # tput_merge.set_cfg("start_ts", start_ts) # tput_merge.set_cfg("window_offset", 5 * 1000) # tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("wait_for_start", 10) tput_merge.set_cfg("num_results", k) tput_merge.set_cfg("sort_column", "-count") # tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "0,0,1") # roll up time, response code and referer tput_merge.instantiate_on(root_node) g.chain([tput_merge, central_cube]) for node in source_nodes: local_cube = define_raw_cube(g, "local_records", node, overwrite=False) # print "cube output dimensions:", local_cube.get_output_dimensions() pull_from_local = jsapi.MultiRoundClient(g) pull_from_local.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local, tput_merge]) deploy_or_dummy(options, server, g)
def main(): parser = standard_option_parser() parser.add_option("--mode", dest="mode", action="store", help="query to run. Should be one of %s" % MODE_LIST) parser.add_option("--wait", dest="wait", action="store", help="how long to wait for results") (options, args) = parser.parse_args() if options.mode: mode = options.mode if len(args) > 0: print "Can't specify mode as both an arg and an option." sys.exit(0) else: if len(args) == 0: print "Must specify a mode. Should be one of %s" % MODE_LIST sys.exit(0) mode = args[0] if mode == "quantiles": define_internal_cube = quant_cube src_to_internal = src_to_quant process_results = process_quant final_rollup_levels = "8,1" elif mode == "urls": define_internal_cube = url_cube src_to_internal = src_to_url process_results = lambda x,y,z: y final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled. elif mode == "domains": define_internal_cube = url_cube src_to_internal = src_to_domain process_results = lambda x,y,z: y final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled. elif mode == "domains_all": define_internal_cube = dom_notime src_to_internal = drop_time_from_doms process_results = lambda x,y,z: y final_rollup_levels = "1,1" #rollup time slightly, rest is unrolled. elif mode == "slow_reqs": define_internal_cube = url_cube src_to_internal = src_slow_reqs process_results = lambda x,y,z: y final_rollup_levels = "9,1,1" #nothing rolled up. elif mode == "bad_domains": define_internal_cube = bad_doms_cube src_to_internal = src_to_bad_doms process_results = bad_doms_postprocess final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled. elif mode == "total_bw": define_internal_cube = bw_cube src_to_internal = src_to_bw process_results = lambda x,y,z: y final_rollup_levels = "8,1" #rollup time slightly, rest is unrolled. elif mode == "bad_referers": define_internal_cube = badreferrer_cube src_to_internal = src_to_badreferrer process_results = badreferrer_out final_rollup_levels = "8,1" #rollup time slightly, rest is unrolled. else: print "Unknown mode %s" % mode sys.exit(0) all_nodes,server = get_all_nodes(options) if len(all_nodes) < 1: print "FATAL: no nodes" sys.exit(0) g= jsapi.QueryGraph() ops = [] union_node = find_root_node(options, all_nodes) for node in all_nodes: if node == union_node and not options.generate_at_union: continue raw_cube = define_raw_cube(g, "local_records", node, overwrite=False) raw_cube_sub = jsapi.TimeSubscriber(g, {}, 1000) raw_cube_sub.set_cfg("simulation_rate", options.warp_factor) raw_cube_sub.set_cfg("ts_field", 0) if options.start_ts: raw_cube_sub.set_cfg("start_ts", options.start_ts) # time_shift = jsapi.TimeWarp(g, field=0, warp=options.warp_factor) last_op = g.chain([raw_cube, raw_cube_sub]) #, time_shift]) last_op = src_to_internal(g, last_op, node, options) last_op.instantiate_on(node) ops.append(last_op) if len(ops) == 0: print "can't run, no [non-union] nodes" sys.exit(0) union_cube = define_internal_cube (g, "union_cube", union_node) g.agg_tree(ops, union_cube, start_ts =options.start_ts, sim_rate=options.warp_factor) if options.bw_cap: union_cube.set_inlink_bwcap(float(options.bw_cap)) #This is the final output subscriber pull_q = jsapi.TimeSubscriber(g, {}, 1000) #only for UI purposes pull_q.set_cfg("ts_field", 0) # pull_q.set_cfg("latency_ts_field", 7) if options.start_ts: pull_q.set_cfg("start_ts", options.start_ts) pull_q.set_cfg("rollup_levels", final_rollup_levels) pull_q.set_cfg("simulation_rate", options.warp_factor) pull_q.set_cfg("window_offset", 8* 1000) #...trailing by a few g.connect(union_cube, pull_q) last_op = process_results(g, pull_q, options) echo = jsapi.Echo(g) echo.instantiate_on(union_node) g.connect(last_op, echo) deploy_or_dummy(options, server, g)
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_anamolous_quant") central_cube.instantiate_on(root_node) define_quant_cube(central_cube) pull_q = jsapi.TimeSubscriber(g, {}, 1000) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) # pull_q.set_cfg("rollup_levels", "8,1") # pull_q.set_cfg("simulation_rate",1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few q_op = jsapi.Quantile(g, 0.95, field=1) g.chain([central_cube, pull_q, q_op]) thresh_cube = g.add_cube("global_coral_anamalous_thresh") thresh_cube.add_dim("time", CubeSchema.Dimension.TIME_CONTAINMENT, 0) thresh_cube.add_agg("thresh", jsapi.Cube.AggType.COUNT, 1) thresh_cube.set_overwrite(True) thresh_cube.instantiate_on(root_node) if not options.no_echo: echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([q_op, echo, thresh_cube]) else: g.chain([q_op, thresh_cube]) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_types) ] global_results = g.add_cube("global_anomalous") define_schema_for_raw_cube(global_results, parsed_field_offsets) global_results.instantiate_on(root_node) FILTER_FIELD = coral_fidxs['nbytes'] for node in source_nodes: ################ First do the data loading part f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "true") f.instantiate_on(node) local_raw_cube = g.add_cube("local_coral_anamolous_all") define_schema_for_raw_cube(local_raw_cube, parsed_field_offsets) pass_raw = jsapi.FilterSubscriber( g) # to pass through to the summary and q-cube to_summary = jsapi.ToSummary(g, field=FILTER_FIELD, size=100) local_q_cube = g.add_cube("local_coral_anamolous_quant") define_quant_cube(local_q_cube, [coral_fidxs['timestamp'], FILTER_FIELD]) g.chain([ f, csvp, round, local_raw_cube, pass_raw, to_summary, local_q_cube ]) pull_from_local = jsapi.TimeSubscriber(g, {}, 1000) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few local_q_cube.instantiate_on(node) pull_from_local.instantiate_on(node) g.chain([local_q_cube, pull_from_local, central_cube]) ################ Now do the second phase passthrough = jsapi.FilterSubscriber(g) passthrough.instantiate_on(root_node) filter = jsapi.FilterSubscriber(g, cube_field=FILTER_FIELD, level_in_field=1) filter.instantiate_on(node) g.chain([thresh_cube, passthrough, filter]) g.chain([local_raw_cube, filter, global_results]) return g
def get_graph(source_nodes, root_node, options): g= jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo MULTIROUND = options.multiround HASH_SAMPLE = options.hash_sample LOCAL_THRESH = options.local_thresh if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_urls") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 5000 , sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "6,0,1") # every five seconds to match subscription. Roll up counts. pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6* 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube,pull_q, echo] ) add_latency_measure(g, central_cube, root_node, tti=4, hti=5, latencylog=options.latencylog) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) congest_logger.set_cfg("field", 3) if MULTIROUND: tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "10,0,1") # roll up response codes tput_merge.instantiate_on(root_node) pull_q.set_cfg("window_offset", 10* 1000) #but trailing by a few g.connect(tput_merge, congest_logger) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): if not options.full_url: table_prefix = "local_coral_domains"; else: table_prefix = "local_coral_urls"; table_prefix += "_"+options.warp_factor; local_cube = g.add_cube(table_prefix+("_%d" %i)) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) if not options.full_url: url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain( [f, csvp, round, url_to_dom, local_cube] ) else: g.chain( [f, csvp, round, local_cube] ) f.instantiate_on(node) else: local_cube.set_overwrite(False) if MULTIROUND: pull_from_local = jsapi.MultiRoundClient(g) else: query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.VariableCoarseningSubscriber(g, {}, query_rate) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("max_window_size", options.max_rollup) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few pull_from_local.set_cfg("sort_order", "-count") pull_from_local.instantiate_on(node) local_cube.instantiate_on(node) # count_logger = jsapi.CountLogger(g, field=3) timestamp_op= jsapi.TimestampOperator(g, "ms") hostname_extend_op = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) #used as dummy hostname for latency tracker hostname_extend_op.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local]) if HASH_SAMPLE: v = jsapi.VariableSampling(g, field=2, type='S') v.set_cfg("steps", options.steps) # print "connecting ", lastOp = g.connect(lastOp, v) g.add_policy( [pull_from_local, v] ) elif LOCAL_THRESH: v = jsapi.WindowLenFilter(g) v.set_cfg("err_field", 3) # print "connecting ", lastOp = g.connect(lastOp, v) g.add_policy( [pull_from_local, v] ) g.chain( [lastOp,timestamp_op, hostname_extend_op]) #output: 0=>time, 1=>response_code, 2=> url 3=> count, 4=> timestamp at source, 5=> hostname if MULTIROUND: g.connect(hostname_extend_op, tput_merge) else: g.connect(hostname_extend_op, congest_logger) timestamp_cube_op= jsapi.TimestampOperator(g, "ms") timestamp_cube_op.instantiate_on(root_node) g.chain ( [congest_logger, timestamp_cube_op, central_cube]) #input to central cube : 0=>time, 1=>response_code, 2=> url 3=> count, 4=> timestamp at source, 5=> hostname 6=> timestamp at union if options.bw_cap: congest_logger.set_inlink_bwcap(float(options.bw_cap)) return g
def get_graph(source_nodes, root_node, options): ECHO_RESULTS = not options.no_echo ANALYZE = not options.load_only LOADING = not options.analyze_only g= jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) global_respcodes = g.add_cube("global_respcodes") define_schema_for_cube(global_respcodes) global_respcodes.instantiate_on(root_node) global_ratios = g.add_cube("global_ratios") define_schema_for_cube(global_ratios) global_ratios.add_agg("ratio", jsapi.Cube.AggType.MIN_D, 4) global_ratios.instantiate_on(root_node) pull_resp = jsapi.TimeSubscriber(g, {}, 1000) pull_resp.set_cfg("ts_field", 0) pull_resp.set_cfg("start_ts", start_ts) pull_resp.set_cfg("rollup_levels", "8,1,1") pull_resp.set_cfg("simulation_rate",1) pull_resp.set_cfg("window_offset", 5* 1000) compute_ratio = jsapi.SeqToRatio(g, url_field = 2, total_field = 3, respcode_field = 1) g.chain( [congest_logger, global_respcodes, pull_resp, compute_ratio, global_ratios] ) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000, num_results= 5, sort_order="-ratio") pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "8,1,1") pull_q.set_cfg("simulation_rate",1) pull_q.set_cfg("window_offset", 12* 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain( [global_ratios, pull_q, echo] ) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_fidxs) ] for node, i in numbered(source_nodes, False): table_prefix = "local_coral_respcodes"; table_prefix += "_"+options.warp_factor; local_cube = g.add_cube(table_prefix+("_%d" %i)) define_schema_for_cube(local_cube, parsed_field_offsets) if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "true") f.instantiate_on(node) url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain( [f, csvp, round, url_to_dom, local_cube ] ) else: local_cube.set_overwrite(False) query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.TimeSubscriber(g, {}, query_rate) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few local_cube.instantiate_on(node) pull_from_local.instantiate_on(node) g.chain( [local_cube, pull_from_local, congest_logger] ) return g
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_quant") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000) #every two seconds pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("latency_ts_field", 7) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "8,1") pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few count_op = jsapi.SummaryToCount(g, 2) q_op = jsapi.Quantile(g, 0.95, 3) q_op2 = jsapi.Quantile(g, 0.95, 2) echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, count_op, q_op, q_op2, echo]) latency_measure_op = jsapi.LatencyMeasureSubscriber(g, time_tuple_index=4, hostname_tuple_index=5, interval_ms=100) #use field echo_op = jsapi.Echo(g) echo_op.set_cfg("file_out", options.latencylog) echo_op.instantiate_on(root_node) g.chain([central_cube, latency_measure_op, echo_op]) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): local_cube = g.add_cube("local_coral_quant_%d" % i) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) to_summary1 = jsapi.ToSummary(g, field=parsed_field_offsets[2], size=5000) to_summary2 = jsapi.ToSummary(g, field=parsed_field_offsets[3], size=5000) g.chain([f, csvp, round, to_summary1, to_summary2, local_cube]) f.instantiate_on(node) else: local_cube.set_overwrite(False) query_rate = 1000 if ANALYZE else 3600 * 1000 if options.no_backoff: pull_from_local = jsapi.TimeSubscriber(g, {}, query_rate) else: pull_from_local = jsapi.VariableCoarseningSubscriber( g, {}, query_rate) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few # pull_from_local.set_cfg("rollup_levels", "8,1") # pull_from_local.set_cfg("window_size", "5000") local_cube.instantiate_on(node) count_logger = jsapi.CountLogger(g, field=4) timestamp_op = jsapi.TimestampOperator(g, "ms") count_extend_op = jsapi.ExtendOperator(g, "i", ["1"]) #why is this here? -asr? count_extend_op.instantiate_on( node) # TODO should get a real hostname here timestamp_cube_op = jsapi.TimestampOperator(g, "ms") timestamp_cube_op.instantiate_on(root_node) g.chain([ local_cube, pull_from_local, count_logger, timestamp_op, count_extend_op, timestamp_cube_op, central_cube ]) if options.bw_cap: timestamp_cube_op.set_inlink_bwcap(float(options.bw_cap)) # g.chain([local_cube, pull_from_local, count_op, q_op, q_op2, echo] ) return g
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo MULTIROUND = options.multiround HASH_SAMPLE = options.hash_sample if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_ua") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 5000, sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "8,1") pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) congest_logger.set_cfg("field", 3) if MULTIROUND: tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) # tput_merge.set_cfg("rollup_levels", "8,1") # roll up time tput_merge.instantiate_on(root_node) pull_q.set_cfg("window_offset", 10 * 1000) #but trailing by a few g.connect(tput_merge, congest_logger) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): table_prefix = "local_coral_ua" table_prefix += "_" + options.warp_factor local_cube = g.add_cube(table_prefix + ("_%d" % i)) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) if not options.full_url: url_to_dom = jsapi.URLToDomain( g, field=coral_fidxs['URL_requested']) g.chain([f, csvp, round, url_to_dom, local_cube]) else: g.chain([f, csvp, round, local_cube]) f.instantiate_on(node) else: local_cube.set_overwrite(False) if MULTIROUND: pull_from_local = jsapi.MultiRoundClient(g) else: query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.VariableCoarseningSubscriber( g, {}, query_rate) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("max_window_size", options.max_rollup) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few pull_from_local.instantiate_on(node) local_cube.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local, congest_logger]) g.chain([congest_logger, central_cube]) return g
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo ONE_LAYER = True if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_bw") central_cube.instantiate_on(root_node) if ONE_LAYER: define_cube(central_cube) else: define_cube(central_cube, [0, 2, 1]) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000) #every two seconds pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) # pull_q.set_cfg("rollup_levels", "8,1") pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 4 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) g.connect(congest_logger, central_cube) if not ONE_LAYER: n_to_intermediate, intermediates = get_intermediates(source_nodes) intermed_cubes = [] for n, i in zip(intermediates, range(0, len(intermediates))): med_cube = g.add_cube("med_coral_bw_%i" % i) med_cube.instantiate_on(n) med_cube.add_dim("time", CubeSchema.Dimension.TIME_CONTAINMENT, 0) med_cube.add_agg("sizes", jsapi.Cube.AggType.COUNT, 2) intermed_cubes.append(med_cube) connect_to_root(g, med_cube, n, congest_logger, start_ts) for node, i in numbered(source_nodes, not LOADING): local_cube = g.add_cube("local_coral_bw_%d" % i) local_cube.add_dim("time", CubeSchema.Dimension.TIME_CONTAINMENT, coral_fidxs['timestamp']) local_cube.add_agg("sizes", jsapi.Cube.AggType.COUNT, coral_fidxs['nbytes']) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) g.chain([f, csvp, round, local_cube]) f.instantiate_on(node) else: local_cube.set_overwrite(False) if ONE_LAYER: print node my_root = congest_logger else: print "multi-layer not yet implemented" sys.exit(0) intermed_id = n_to_intermediate[node] my_root = intermed_cubes[intermed_id] connect_to_root(g, local_cube, node, my_root, start_ts, ANALYZE) return g
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_bw") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000) #every two seconds pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) # pull_q.set_cfg("rollup_levels", "8,1") pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 4 * 1000) #but trailing by a few q_op = jsapi.Quantile(g, 0.95, 1) echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, q_op, echo]) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) g.connect(congest_logger, central_cube) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['nbytes']] for node, i in numbered(source_nodes, not LOADING): local_cube = g.add_cube("local_coral_quant_%d" % i) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) to_summary1 = jsapi.ToSummary(g, field=parsed_field_offsets[1], size=100) g.chain([f, csvp, round, to_summary1, local_cube]) f.instantiate_on(node) else: local_cube.set_overwrite(False) query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.TimeSubscriber(g, {}, query_rate) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few # pull_from_local.set_cfg("rollup_levels", "8,1") # pull_from_local.set_cfg("window_size", "5000") local_cube.instantiate_on(node) g.chain([local_cube, pull_from_local, congest_logger]) return g