def test_randEval(self): qGraph = jsapi.QueryGraph() src = jsapi.RandSource(qGraph, 1, 2) ex = jsapi.ExtendOperator(qGraph, "i", ["a count"]) eval = jsapi.RandEval(qGraph) qGraph.connect(src, ex) qGraph.connect(ex, eval) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("Should not throw, but got: " + str(ex))
def main(): parser = OptionParser() parser.add_option("-C", "--config", dest="config_file", help="read config from FILE", metavar="FILE") parser.add_option("-a", "--controller", dest="controller", help="controller address", default="localhost:3456") (options, args) = parser.parse_args() pattern = ".*" + args[0] + ".*" file_to_grep = args[1] if ':' in options.controller: (serv_addr, serv_port) = options.controller.split(':') serv_port = int(serv_port) else: serv_addr = options.controller serv_port = 3456 ### Define the graph abstractly, without a computation g = jsapi.QueryGraph() reader = jsapi.FileRead(g, file_to_grep) grepper = jsapi.StringGrep(g, pattern) host_extend = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) cube = g.add_cube("local_results") cube.add_dim("log_line", Element.STRING, 0) cube.add_dim("hostname", Element.STRING, 1) cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) cube.set_overwrite(True) #fresh results g.connect(reader,grepper) g.connect(grepper, host_extend) g.connect(host_extend, cube) #### Finished building in memory, now to join server = RemoteController() server.connect(serv_addr, serv_port) n = server.get_a_node() assert isinstance(n, NodeID) nodes = server.all_nodes() cube.instantiate_on(n) host_extend.instantiate_on(nodes) result_reader = ClientDataReader() net_addr = result_reader.prep_to_receive_data() g.connect(cube, net_addr) server.deploy(g) result_reader.blocking_read(lambda x: print x )
def test_with_partial_placement(self): dummyNode1 = ("host", 123) dummyNode2 = ("host2", 234) planner = QueryPlanner({ dummyNode1: dummyNode1, dummyNode2: dummyNode2 }) g = jsapi.QueryGraph() evalOp = jsapi.RandEval(g) for node, k in zip([dummyNode1, dummyNode2], range(0, 2)): src = jsapi.RandSource(g, 1, 2) src.set_cfg("rate", 1000) localCube = g.add_cube("local_results_%d" % k) localCube.add_dim("state", Element.STRING, 0) localCube.add_dim("time", Element.TIME, 1) localCube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) pullOp = jsapi.TimeSubscriber(g, {}, 1000) pullOp.set_cfg("ts_field", 1) pullOp.set_cfg("window_offset", 1000) #pull every three seconds, trailing by one extendOp = jsapi.ExtendOperator(g, "s", ["node" + str(k)]) roundOp = jsapi.TRoundOperator(g, fld=1, round_to=5) g.connect(src, localCube) g.connect(localCube, pullOp) g.connect(pullOp, extendOp) g.connect(extendOp, roundOp) g.connect(roundOp, evalOp) nID = NodeID() nID.address, nID.portno = node src.instantiate_on(nID) g.validate_schemas() err = planner.take_raw_topo(g.get_deploy_pb().alter[0]) self.assertEquals(len(err), 0) plan = planner.get_assignments(1) pb1 = plan[dummyNode1].get_pb().alter[0] subscribers = [x for x in pb1.toStart if "Subscriber" in x.op_typename] self.assertEquals(len(subscribers), len(pb1.toCreate)) self.assertEquals(len(pb1.toCreate), 1) self.assertGreater(len(pb1.toStart), 3) self.assertLessEqual(len(pb1.toStart), 4)
def test_bad_unify(self): qGraph = jsapi.QueryGraph() src = jsapi.RandSource(qGraph, 1, 2) reader = jsapi.FileRead(qGraph, "file name") dest = jsapi.ExtendOperator(qGraph, "s", ["a string"]) qGraph.connect(reader, dest) qGraph.connect(src, dest) try: qGraph.validate_schemas() except SchemaError as ex: self.assertTrue("match existing schema" in str(ex)) # print "got expected err:", str(ex) else: self.fail("should throw, but didn't")
def connect_to_root(g, local_cube, node, root_op, start_ts, ANALYZE=False): query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.TimeSubscriber(g, {}, query_rate) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few # pull_from_local.set_cfg("rollup_levels", "8,1") # pull_from_local.set_cfg("window_size", "5000") local_cube.instantiate_on(node) hostname_extend_op = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) hostname_extend_op.instantiate_on(node) g.chain([local_cube, pull_from_local, hostname_extend_op, root_op])
def test_cubeFilterSubscriber(self): qGraph = jsapi.QueryGraph() src = jsapi.RandSource(qGraph, 1, 2) local_cube = qGraph.add_cube("results") local_cube.add_dim("state", Element.STRING, 0) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) filter = jsapi.FilterSubscriber(qGraph, cube_field=2, level_in_field=0) #out-schema from filter should be S,T, matching source ex = jsapi.ExtendOperator(qGraph, "i", ["a count"]) eval_op = jsapi.RandEval(qGraph) qGraph.chain([src, ex, local_cube, filter, eval_op]) reader = jsapi.FileRead(qGraph, "file name") csv_parse = jsapi.CSVParse(qGraph, types="I", fields_to_keep="all") qGraph.chain([reader, csv_parse, filter]) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("should not throw, but got " + str(ex))
def src_to_bw(g, data_src, node, options): hostname_extend_op = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) return g.chain([data_src, jsapi.Project(g, 5), jsapi.Project(g, 4), \ jsapi.Project(g, 1), jsapi.Project(g, 1), hostname_extend_op])
def get_graph(source_nodes, root_node, options): g= jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo MULTIROUND = options.multiround HASH_SAMPLE = options.hash_sample LOCAL_THRESH = options.local_thresh if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_urls") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 5000 , sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "6,0,1") # every five seconds to match subscription. Roll up counts. pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6* 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube,pull_q, echo] ) add_latency_measure(g, central_cube, root_node, tti=4, hti=5, latencylog=options.latencylog) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) congest_logger.set_cfg("field", 3) if MULTIROUND: tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "10,0,1") # roll up response codes tput_merge.instantiate_on(root_node) pull_q.set_cfg("window_offset", 10* 1000) #but trailing by a few g.connect(tput_merge, congest_logger) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): if not options.full_url: table_prefix = "local_coral_domains"; else: table_prefix = "local_coral_urls"; table_prefix += "_"+options.warp_factor; local_cube = g.add_cube(table_prefix+("_%d" %i)) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) if not options.full_url: url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain( [f, csvp, round, url_to_dom, local_cube] ) else: g.chain( [f, csvp, round, local_cube] ) f.instantiate_on(node) else: local_cube.set_overwrite(False) if MULTIROUND: pull_from_local = jsapi.MultiRoundClient(g) else: query_rate = 1000 if ANALYZE else 3600 * 1000 pull_from_local = jsapi.VariableCoarseningSubscriber(g, {}, query_rate) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("max_window_size", options.max_rollup) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few pull_from_local.set_cfg("sort_order", "-count") pull_from_local.instantiate_on(node) local_cube.instantiate_on(node) # count_logger = jsapi.CountLogger(g, field=3) timestamp_op= jsapi.TimestampOperator(g, "ms") hostname_extend_op = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) #used as dummy hostname for latency tracker hostname_extend_op.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local]) if HASH_SAMPLE: v = jsapi.VariableSampling(g, field=2, type='S') v.set_cfg("steps", options.steps) # print "connecting ", lastOp = g.connect(lastOp, v) g.add_policy( [pull_from_local, v] ) elif LOCAL_THRESH: v = jsapi.WindowLenFilter(g) v.set_cfg("err_field", 3) # print "connecting ", lastOp = g.connect(lastOp, v) g.add_policy( [pull_from_local, v] ) g.chain( [lastOp,timestamp_op, hostname_extend_op]) #output: 0=>time, 1=>response_code, 2=> url 3=> count, 4=> timestamp at source, 5=> hostname if MULTIROUND: g.connect(hostname_extend_op, tput_merge) else: g.connect(hostname_extend_op, congest_logger) timestamp_cube_op= jsapi.TimestampOperator(g, "ms") timestamp_cube_op.instantiate_on(root_node) g.chain ( [congest_logger, timestamp_cube_op, central_cube]) #input to central cube : 0=>time, 1=>response_code, 2=> url 3=> count, 4=> timestamp at source, 5=> hostname 6=> timestamp at union if options.bw_cap: congest_logger.set_inlink_bwcap(float(options.bw_cap)) return g
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() ANALYZE = not options.load_only LOADING = not options.analyze_only ECHO_RESULTS = not options.no_echo if not LOADING and not ANALYZE: print "can't do neither load nor analysis" sys.exit(0) start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_quant") central_cube.instantiate_on(root_node) define_cube(central_cube) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000) #every two seconds pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("latency_ts_field", 7) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "8,1") pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few count_op = jsapi.SummaryToCount(g, 2) q_op = jsapi.Quantile(g, 0.95, 3) q_op2 = jsapi.Quantile(g, 0.95, 2) echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, count_op, q_op, q_op2, echo]) latency_measure_op = jsapi.LatencyMeasureSubscriber(g, time_tuple_index=4, hostname_tuple_index=5, interval_ms=100) #use field echo_op = jsapi.Echo(g) echo_op.set_cfg("file_out", options.latencylog) echo_op.instantiate_on(root_node) g.chain([central_cube, latency_measure_op, echo_op]) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_types) ] for node, i in numbered(source_nodes, not LOADING): local_cube = g.add_cube("local_coral_quant_%d" % i) define_cube(local_cube, parsed_field_offsets) print "cube output dimensions:", local_cube.get_output_dimensions() if LOADING: f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) to_summary1 = jsapi.ToSummary(g, field=parsed_field_offsets[2], size=5000) to_summary2 = jsapi.ToSummary(g, field=parsed_field_offsets[3], size=5000) g.chain([f, csvp, round, to_summary1, to_summary2, local_cube]) f.instantiate_on(node) else: local_cube.set_overwrite(False) query_rate = 1000 if ANALYZE else 3600 * 1000 if options.no_backoff: pull_from_local = jsapi.TimeSubscriber(g, {}, query_rate) else: pull_from_local = jsapi.VariableCoarseningSubscriber( g, {}, query_rate) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few # pull_from_local.set_cfg("rollup_levels", "8,1") # pull_from_local.set_cfg("window_size", "5000") local_cube.instantiate_on(node) count_logger = jsapi.CountLogger(g, field=4) timestamp_op = jsapi.TimestampOperator(g, "ms") count_extend_op = jsapi.ExtendOperator(g, "i", ["1"]) #why is this here? -asr? count_extend_op.instantiate_on( node) # TODO should get a real hostname here timestamp_cube_op = jsapi.TimestampOperator(g, "ms") timestamp_cube_op.instantiate_on(root_node) g.chain([ local_cube, pull_from_local, count_logger, timestamp_op, count_extend_op, timestamp_cube_op, central_cube ]) if options.bw_cap: timestamp_cube_op.set_inlink_bwcap(float(options.bw_cap)) # g.chain([local_cube, pull_from_local, count_op, q_op, q_op2, echo] ) return g
def test_line_graph_with_subscriber(self): dummyNode1 = ("host", 123) dummyNode2 = ("host2", 234) dummyNode3 = ("host3", 345) planner = QueryPlanner({ dummyNode1: dummyNode1, dummyNode2: dummyNode2, dummyNode3: dummyNode3 }) g = jsapi.QueryGraph() src = jsapi.RandSource(g, 1, 2) src.set_cfg("rate", 1000) localCube = g.add_cube("local_results") localCube.add_dim("state", Element.STRING, 0) localCube.add_dim("time", Element.TIME, 1) localCube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) pullOp = jsapi.TimeSubscriber(g, {}, 1000) pullOp.set_cfg("ts_field", 1) pullOp.set_cfg("window_offset", 1000) #pull every three seconds, trailing by one remoteCube = g.add_cube("remote_results") remoteCube.add_dim("state", Element.STRING, 0) remoteCube.add_dim("time", Element.TIME, 1) remoteCube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) extendOp = jsapi.ExtendOperator(g, "s", ["node1"]) roundOp = jsapi.TRoundOperator(g, fld=1, round_to=5) # The line graph topology is: src -> cube -> subscriber -> operator(s) -> cube. g.connect(src, localCube) g.connect(localCube, pullOp) g.connect(pullOp, extendOp) g.connect(extendOp, roundOp) g.connect(roundOp, remoteCube) node1ID = NodeID() node1ID.address, node1ID.portno = dummyNode1 node2ID = NodeID() node2ID.address, node2ID.portno = dummyNode2 node3ID = NodeID() node3ID.address, node3ID.portno = dummyNode3 g.validate_schemas() # Pin nothing: everything should be placed on one node err = planner.take_raw_topo(g.get_deploy_pb().alter[0]) self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertEquals(len(plan), 1) # Pin source (src): everything should be placed on the source node src.instantiate_on(node2ID) err = planner.take_raw_topo(g.get_deploy_pb().alter[0]) self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertEquals(len(plan), 1) self.assertTrue(dummyNode2 in plan) # Pin source (src) and sink (remoteCube): everything except sink should be on source node src.instantiate_on(node2ID) remoteCube.instantiate_on(node1ID) err = planner.take_raw_topo(g.get_deploy_pb().alter[0]) self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertEquals(len(plan), 2) node1Plan = plan[dummyNode1] node2Plan = plan[dummyNode2] self.assertEquals(len(node1Plan.cubes), 1) self.assertTrue(node1Plan.cubes[0].name.endswith(remoteCube.name)) self.assertEquals(len(node1Plan.operators), 0) self.assertEquals(len(node2Plan.cubes), 1) self.assertTrue(node2Plan.cubes[0].name.endswith(localCube.name)) self.assertEquals(len(node2Plan.operators), 4) # Pin source (src), source cube (localCube), and sink (remoteCube): regardless of where # source and sink are placed, source cube up to (but excluding) sink should be on same node src.instantiate_on(node2ID) localCube.instantiate_on(node3ID) remoteCube.instantiate_on(node1ID) err = planner.take_raw_topo(g.get_deploy_pb().alter[0]) self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertEquals(len(plan), 3) node3Plan = plan[dummyNode3] self.assertEquals(len(node3Plan.cubes), 1) self.assertTrue(node3Plan.cubes[0].name.endswith(localCube.name)) self.assertEquals(len(node3Plan.operators), 3) # In particular, the cube subscriber should be on the same node as the cube! pb3 = node3Plan.get_pb().alter[0] subscribers = [x for x in pb3.toStart if "Subscriber" in x.op_typename] self.assertEquals(len(subscribers), 1)
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) congest_logger.set_cfg("hist_field", 2) congest_logger.set_cfg("report_interval", 10 * 1000) # timestamp_cube_op= jsapi.TimestampOperator(g, "ms") # timestamp_cube_op.instantiate_on(root_node) if not options.no_cube: central_cube = g.add_cube("global_hists") central_cube.set_overwrite(True) central_cube.instantiate_on(root_node) central_cube.add_dim("time", CubeSchema.Dimension.TIME_CONTAINMENT, 0) central_cube.add_dim("randvar_for_degrade", Element.INT32, 1) central_cube.add_agg("the_hist", jsapi.Cube.AggType.HISTO, 2) if store_hostnames: central_cube.add_dim("hostname", Element.STRING, 4) g.chain([congest_logger, central_cube]) add_latency_measure(g, central_cube, root_node, tti=3, hti=4, latencylog=options.latencylog, interval=options.latency_interval_ms) else: print "No Cube" # ground = jsapi.Ground(g) # ground.instantiate_on(root_node) # g.chain([congest_logger, ground]) add_latency_measure(g, congest_logger, root_node, tti=3, hti=4, latencylog=options.latencylog, interval=options.latency_interval_ms) if options.rate: options.schedule_increment = 0 options.schedule_start = options.rate for node, i in numbered(source_nodes): sender = jsapi.RandHist(g) sender.set_cfg("schedule_start", options.schedule_start) sender.set_cfg("schedule_wait", options.schedule_wait) sender.set_cfg("schedule_increment", options.schedule_increment) sender.set_cfg("schedule_max", options.schedule_max) sender.set_cfg("unique_vals", options.unique_vals) sender.set_cfg("hist_size", options.hist_size) sender.set_cfg("window_fudge_factor", options.window_fudge_factor) sender.set_cfg("wait_per_batch", 4000) sender.set_cfg("batches_per_window", 1) if options.degrade_at_source: sender.set_cfg("levels", options.degradation_step_count) sender.instantiate_on(node) if not options.no_degrade and not options.degrade_at_source: if options.sample: degrade = jsapi.VariableSampling(g, field=1, type='I') degrade.set_cfg("debug_stage", options.sample_debug_stage) degrade.instantiate_on(node) else: degrade = jsapi.DegradeSummary(g, 2) degrade.set_cfg("step_count", options.degradation_step_count) degrade.set_cfg("min_ratio", options.degradation_min_ratio) degrade.instantiate_on(node) timestamp_op = jsapi.TimestampOperator(g, "ms") hostname_extend_op = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) hostname_extend_op.instantiate_on(node) if not options.no_degrade and not options.degrade_at_source: g.chain([ sender, degrade, timestamp_op, hostname_extend_op, congest_logger ]) else: g.chain([sender, timestamp_op, hostname_extend_op, congest_logger]) return g