def test_2node(self): dummyNode, dummy2 = ("host", 123), ("host2", 123) planner = QueryPlanner({dummyNode: dummyNode, dummy2: dummy2}) g = jsapi.QueryGraph() ucube = g.add_cube("union") ucube.add_dim("state", Element.STRING, 0) ucube.add_agg("count", jsapi.Cube.AggType.COUNT, 1) readers = [] for node in [dummyNode, dummy2]: reader = jsapi.FileRead(g, "file name") readers.append(reader) nID = NodeID() nID.address, nID.portno = node reader.instantiate_on(nID) # g.connect(reader, ucube) #agg tree test, so we don't need this g.agg_tree(readers, ucube) self.assertEquals(6, len( g.edges)) # 2x (op --> cube --> subscriber --> union) req = ControlMessage() req.type = ControlMessage.ALTER g.add_to_PB(req.alter.add()) err = planner.take_raw_topo(req.alter[0]).lower() self.assertEquals(len(err), 0)
def parse_setup(): (serv_addr, serv_port), file_to_parse = js_client_config.arg_config() k2 = 20 # how many to pull to top level k = 10 # how many to display # specify the query fields that this computation is interested in #which_coral_fields = [coral_fidxs['URL_requested']] agg_field_idx = coral_fidxs['URL_requested'] g = jsapi.QueryGraph() f = jsapi.FileRead(g, file_to_parse, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) grab_domain = jsapi.GenericParse(g, DOMAIN_CAPTURE, coral_types[agg_field_idx], field_to_parse=agg_field_idx, keep_unparsed=False) pull_k2 = jsapi.TimeSubscriber(g, {}, 2000, "-count", k2) local_cube = g.add_cube("coral_results") local_cube.add_dim("Requested_domains", Element.STRING, 0) # index past end of tuple is a magic API to the "count" aggregate that tells # it to assume a count of 1 local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1) local_cube.set_overwrite(True) # fresh results g.chain([f, csvp, grab_domain, local_cube, pull_k2]) cr = ClientDataReader(raw_data=True) g.connectExternal(pull_k2, cr.prep_to_receive_data()) remote_deploy(serv_addr, serv_port, g, cube=local_cube) return cr
def test_2op_plan(self): """This test creates an operator and a cube, attached.""" dummyNode = ("host", 123) planner = QueryPlanner({dummyNode: dummyNode}) qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") cube = qGraph.add_cube("local_results") cube.add_dim("hostname", Element.STRING, 0) cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1) cube.set_overwrite(True) #fresh results qGraph.connect(reader, cube) req = ControlMessage() req.type = ControlMessage.ALTER qGraph.add_to_PB(req.alter.add()) err = planner.take_raw_topo(req.alter[0]).lower() if len(err) > 0: print "Test yielded unexpected error:", err self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertTrue(dummyNode in plan) self.assertEquals(len(plan), 1) self.assertEquals(len(plan[dummyNode].operators), 1) self.assertEquals(len(plan[dummyNode].cubes), 1) pbToNode = plan[dummyNode].get_pb() self.assertEquals(len(pbToNode.alter[0].edges), 1)
def test_cubeSubscribe(self): qGraph = jsapi.QueryGraph() local_cube = qGraph.add_cube("results") local_cube.add_dim("state", Element.STRING, 0) local_cube.add_dim("time", Element.TIME, 1) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) sub = jsapi.TimeSubscriber(qGraph, {}, 1000, "-count") #pull every second eval_op = jsapi.RandEval(qGraph) qGraph.connect(local_cube, sub) qGraph.connect(sub, eval_op) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("should not throw, but got " + str(ex)) sub2 = jsapi.TimeSubscriber(qGraph, {}, 1000, "-count") #pull every second rounder = jsapi.TRoundOperator(qGraph, 0, 2) qGraph.connect(sub2, rounder) qGraph.connect(local_cube, sub2) # self.assertTrue(1 not in qGraph.operators) try: qGraph.validate_schemas() except SchemaError as ex: self.assertTrue("requires that field 0 be a time" in str(ex)) print "got expected err:", str(ex) else: self.fail("should throw, but didn't")
def test_cubeInsert(self): qGraph = jsapi.QueryGraph() local_cube = qGraph.add_cube("results") local_cube.add_dim("state", Element.STRING, 0) local_cube.add_dim("time", Element.TIME, 1) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) src = jsapi.RandSource(qGraph, 1, 2) qGraph.connect(src, local_cube) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("should not throw, but got " + str(ex)) qGraph.remove(src) #add a mismatched edge, string versus string,time reader = jsapi.FileRead(qGraph, "file name") qGraph.connect(reader, local_cube) e_map = qGraph.forward_edge_map() self.assertEquals(len(e_map), 1) try: qGraph.validate_schemas() except SchemaError as ex: print "got expected err:", str(ex) else: self.fail("should throw, but didn't")
def test_serializePolicy(self): qGraph = jsapi.QueryGraph() local_cube = qGraph.add_cube("results") local_cube.add_dim("state", Element.STRING, 0) local_cube.add_dim("time", Element.TIME, 1) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) src = jsapi.RandSource(qGraph, 1, 2) sub = jsapi.TimeSubscriber(qGraph, {}, 1000, "-count") #pull every second sample = jsapi.VariableSampling(qGraph) eval_op = jsapi.RandEval(qGraph) qGraph.chain([src, local_cube, sub, sample, eval_op]) qGraph.add_policy([sub, sample]) try: pb = qGraph.get_deploy_pb() self.assertEquals(len(pb.alter[0].congest_policies), 1) oid = pb.alter[0].congest_policies[0].op[0].task self.assertEquals(oid, sub.id) # print str(pb.alter) except SchemaError as ex: self.fail("should not throw, but got " + str(ex))
def test_external_edge_plan(self): qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") req = ControlMessage() req.type = ControlMessage.ALTER qGraph.add_to_PB(req.alter.add()) MY_PORTNO = 1000 e = req.alter[0].edges.add() e.src = req.alter[0].toStart[0].id.task e.computation = 0 e.dest_addr.address = "myhost" e.dest_addr.portno = MY_PORTNO dummyNode = ("host", 123) planner = QueryPlanner({dummyNode: dummyNode}) err = planner.take_raw_topo(req.alter[0]).lower() self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertTrue(dummyNode in plan) self.assertEquals(len(plan), 1) self.assertEquals(len(plan[dummyNode].operators), 1) pbToNode = plan[dummyNode].get_pb() self.assertEquals(len(pbToNode.alter[0].edges), 1) self.assertEquals(pbToNode.alter[0].edges[0].dest_addr.portno, MY_PORTNO)
def main(): parser = standard_option_parser() (options, args) = parser.parse_args() all_nodes, server = get_all_nodes(options) root_node = find_root_node(options, all_nodes) print "%d worker nodes in system" % len(all_nodes) g = jsapi.QueryGraph() collector = jsapi.ImageQuality(g) collector.instantiate_on(root_node) if len(all_nodes) < 1 or (len(all_nodes) == 1 and options.generate_at_union): print "FAIL: not enough nodes" sys.exit(0) for node in all_nodes: if node == root_node and not options.generate_at_union: continue reader = jsapi.VideoSource(g, "/tmp/jetstream/mot.profile.csv", "/tmp/jetstream/mot.source.csv", 1500) timestamp = jsapi.TimestampOperator(g, "ms") reader.instantiate_on(node) g.chain([reader, timestamp, collector]) print "deploying" deploy_or_dummy(options, server, g)
def get_graph(node, options): g = jsapi.QueryGraph() #we don't use this here # start_ts = parse_ts(options.start_ts) #coral_fidxs['Referrer_URL'], parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], coral_fidxs['nbytes'], \ coral_fidxs['dl_utime'], len(coral_types) ] f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "false") f.instantiate_on(node) local_raw_cube = define_raw_cube(g, options.cube_name, node, parsed_field_offsets, True) if not options.full_url: url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested']) g.chain([f, csvp, round, url_to_dom, local_raw_cube]) else: g.chain([f, csvp, round, local_raw_cube]) return g
def test_CSVParse_validate(self): qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") csvprs = jsapi.CSVParse(qGraph, "ISDDDIIDSISIISD") qGraph.connect(reader, csvprs) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("Should not throw, but got: " + str(ex))
def main(): parser = standard_option_parser() parser.add_option("--rate", dest="img_per_sec", default="2", help="number of images to send per second") parser.add_option("--dir", dest="dirname", default="sample_images", help="where to read from") parser.add_option("--prefix", dest="prefix", default = "l", help="prefix for images.") parser.add_option("--degradation", dest="deg", default="interval", help="which degradation to use; can be hash, interval") (options, args) = parser.parse_args() all_nodes,server = get_all_nodes(options) root_node = find_root_node(options, all_nodes) print "%d worker nodes in system" % len(all_nodes) g= jsapi.QueryGraph() files_per_window = float(options.img_per_sec) * window_len_sec collector = jsapi.ImageQuality(g) collector.instantiate_on(root_node) if len(all_nodes) < 1 or (len(all_nodes) == 1 and options.generate_at_union): print "FAIL: not enough nodes" sys.exit(0) if options.deg == INTERVAL: print "Using interval sampling (Coarse-grained)" elif options.deg == HASH: print "Using hash-sampling. (Fine-grained)" elif options.deg == NONE: print "No degradation" else: print "unknown degradation %s. Aborting" % options.deg sys.exit(0) for node in all_nodes: if node == root_node and not options.generate_at_union: continue reader = jsapi.BlobReader(g, dirname=options.dirname, prefix=options.prefix, files_per_window=files_per_window, ms_per_window = 1000 * window_len_sec) if options.deg == INTERVAL: filter = jsapi.IntervalSampling(g, max_interval=4) elif options.deg == HASH: filter = jsapi.VariableSampling(g, field=0, type='I') filter.set_cfg("steps", "20") timestamp = jsapi.TimestampOperator(g, "ms") reader.instantiate_on(node) if options.deg == NONE: g.chain([reader, timestamp, collector]) else: g.chain([reader, filter, timestamp, collector]) print "deploying" deploy_or_dummy(options, server, g)
def main(): parser = OptionParser() parser.add_option("-C", "--config", dest="config_file", help="read config from FILE", metavar="FILE") parser.add_option("-a", "--controller", dest="controller", help="controller address", default="localhost:3456") (options, args) = parser.parse_args() serv_addr, serv_port = normalize_controller_addr(options.controller) file_to_parse = args[0] k2 = 20 #how many to pull to top level k = 10 #how many to display ### Define the graph abstractly, without a computation g = jsapi.QueryGraph() reader = jsapi.FileRead(g, file_to_parse) parse = jsapi.GenericParse(g, ".*GET ([^ ]*) .*", "s") local_cube = g.add_cube("local_results") local_cube.add_dim("url", Element.STRING, 0) # cube.add_dim("hostname", Element.STRING, 1) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1) local_cube.set_overwrite(True) #fresh results pull_k2 = jsapi.TimeSubscriber(g, {}, 2000, "-count", k2) echo = jsapi.Echo(g) # local_cube = jsapi.Echo(g) g.connect(reader, parse) g.connect(parse, local_cube) g.connect(local_cube, pull_k2) g.connect(pull_k2, echo) # Should do a pull into a consolidated cube #### Finished building in memory, now to join server = RemoteController((serv_addr, serv_port)) n = server.get_a_node() assert isinstance(n, NodeID) all_nodes = server.all_nodes() local_cube.instantiate_on(all_nodes) server.deploy(g)
def test_file_and_counter(self): qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") counter = jsapi.RateRecord(qGraph) qGraph.connect(reader, counter) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("Should not throw, but got: " + str(ex))
def test_randEval(self): qGraph = jsapi.QueryGraph() src = jsapi.RandSource(qGraph, 1, 2) ex = jsapi.ExtendOperator(qGraph, "i", ["a count"]) eval = jsapi.RandEval(qGraph) qGraph.connect(src, ex) qGraph.connect(ex, eval) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("Should not throw, but got: " + str(ex))
def main(): parser = OptionParser() parser.add_option("-C", "--config", dest="config_file", help="read config from FILE", metavar="FILE") parser.add_option("-a", "--controller", dest="controller", help="controller address", default="localhost:3456") (options, args) = parser.parse_args() pattern = ".*" + args[0] + ".*" file_to_grep = args[1] if ':' in options.controller: (serv_addr, serv_port) = options.controller.split(':') serv_port = int(serv_port) else: serv_addr = options.controller serv_port = 3456 ### Define the graph abstractly, without a computation g = jsapi.QueryGraph() reader = jsapi.FileRead(g, file_to_grep) grepper = jsapi.StringGrep(g, pattern) host_extend = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"]) cube = g.add_cube("local_results") cube.add_dim("log_line", Element.STRING, 0) cube.add_dim("hostname", Element.STRING, 1) cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) cube.set_overwrite(True) #fresh results g.connect(reader,grepper) g.connect(grepper, host_extend) g.connect(host_extend, cube) #### Finished building in memory, now to join server = RemoteController() server.connect(serv_addr, serv_port) n = server.get_a_node() assert isinstance(n, NodeID) nodes = server.all_nodes() cube.instantiate_on(n) host_extend.instantiate_on(nodes) result_reader = ClientDataReader() net_addr = result_reader.prep_to_receive_data() g.connect(cube, net_addr) server.deploy(g) result_reader.blocking_read(lambda x: print x )
def test_cubeInsertPartial(self): print "inserting tuple with more data than cube" #tests insertion where the tuple has more fields than the cube qGraph = jsapi.QueryGraph() local_cube = qGraph.add_cube("results") local_cube.add_dim("state", Element.STRING, 0) src = jsapi.RandSource(qGraph, 1, 2) qGraph.connect(src, local_cube) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("should not throw, but got " + str(ex))
def test_bad_edge(self): qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") rounder = jsapi.TRoundOperator(qGraph, 2, 2) qGraph.connect(reader, rounder) try: qGraph.validate_schemas() except SchemaError as ex: self.assertTrue("can't round field 2" in str(ex)) else: self.fail("should throw, but didn't")
def main(): parser = standard_option_parser() (options, args) = parser.parse_args() all_nodes, server = get_all_nodes(options) root_node = find_root_node(options, all_nodes) source_nodes = get_source_nodes(options, all_nodes, root_node) g = jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) central_cube = define_raw_cube(g, "global_coral_urls", root_node, overwrite=True) if not options.no_echo: pull_q = jsapi.TimeSubscriber(g, {}, 30000, sort_order="-count", num_results=10) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg( "rollup_levels", "6,0,1" ) # every five seconds to match subscription. Roll up counts. pull_q.set_cfg("simulation_rate", 1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) tput_merge = jsapi.MultiRoundCoord(g) tput_merge.set_cfg("start_ts", start_ts) tput_merge.set_cfg("window_offset", 5 * 1000) tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("num_results", 10) tput_merge.set_cfg("sort_column", "-count") tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "10,0,1") # roll up response code and referer tput_merge.instantiate_on(root_node) g.chain([tput_merge, central_cube]) for node in source_nodes: local_cube = define_raw_cube(g, "local_records", node, overwrite=False) # print "cube output dimensions:", local_cube.get_output_dimensions() pull_from_local = jsapi.MultiRoundClient(g) pull_from_local.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local, tput_merge]) deploy_or_dummy(options, server, g)
def test_with_partial_placement(self): dummyNode1 = ("host", 123) dummyNode2 = ("host2", 234) planner = QueryPlanner({ dummyNode1: dummyNode1, dummyNode2: dummyNode2 }) g = jsapi.QueryGraph() evalOp = jsapi.RandEval(g) for node, k in zip([dummyNode1, dummyNode2], range(0, 2)): src = jsapi.RandSource(g, 1, 2) src.set_cfg("rate", 1000) localCube = g.add_cube("local_results_%d" % k) localCube.add_dim("state", Element.STRING, 0) localCube.add_dim("time", Element.TIME, 1) localCube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) pullOp = jsapi.TimeSubscriber(g, {}, 1000) pullOp.set_cfg("ts_field", 1) pullOp.set_cfg("window_offset", 1000) #pull every three seconds, trailing by one extendOp = jsapi.ExtendOperator(g, "s", ["node" + str(k)]) roundOp = jsapi.TRoundOperator(g, fld=1, round_to=5) g.connect(src, localCube) g.connect(localCube, pullOp) g.connect(pullOp, extendOp) g.connect(extendOp, roundOp) g.connect(roundOp, evalOp) nID = NodeID() nID.address, nID.portno = node src.instantiate_on(nID) g.validate_schemas() err = planner.take_raw_topo(g.get_deploy_pb().alter[0]) self.assertEquals(len(err), 0) plan = planner.get_assignments(1) pb1 = plan[dummyNode1].get_pb().alter[0] subscribers = [x for x in pb1.toStart if "Subscriber" in x.op_typename] self.assertEquals(len(subscribers), len(pb1.toCreate)) self.assertEquals(len(pb1.toCreate), 1) self.assertGreater(len(pb1.toStart), 3) self.assertLessEqual(len(pb1.toStart), 4)
def test_bad_unify(self): qGraph = jsapi.QueryGraph() src = jsapi.RandSource(qGraph, 1, 2) reader = jsapi.FileRead(qGraph, "file name") dest = jsapi.ExtendOperator(qGraph, "s", ["a string"]) qGraph.connect(reader, dest) qGraph.connect(src, dest) try: qGraph.validate_schemas() except SchemaError as ex: self.assertTrue("match existing schema" in str(ex)) # print "got expected err:", str(ex) else: self.fail("should throw, but didn't")
def get_graph(source_nodes, root_node, options): ECHO_RESULTS = not options.no_echo g = jsapi.QueryGraph() BOUND = 100 start_ts = parse_ts(options.start_ts) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_fidxs) ] global_results = g.add_cube("global_slow") define_schema_for_raw_cube(global_results, parsed_field_offsets) global_results.instantiate_on(root_node) congest_logger = jsapi.AvgCongestLogger(g) congest_logger.instantiate_on(root_node) g.connect(congest_logger, global_results) if ECHO_RESULTS: pull_q = jsapi.TimeSubscriber(g, {}, 1000) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) # pull_q.set_cfg("rollup_levels", "8,1") # pull_q.set_cfg("simulation_rate",1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([global_results, pull_q, echo]) for node, i in numbered(source_nodes, False): f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "true") f.instantiate_on(node) filter = jsapi.RatioFilter(g, numer=coral_fidxs['dl_utime'], \ denom = coral_fidxs['nbytes'], bound = BOUND) g.chain([f, csvp, round, filter, congest_logger]) return g
def test_1node_plan(self): dummyNode = ("host", 123) planner = QueryPlanner({dummyNode: dummyNode}) qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") req = ControlMessage() req.type = ControlMessage.ALTER qGraph.add_to_PB(req.alter.add()) err = planner.take_raw_topo(req.alter[0]).lower() self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertTrue(dummyNode in plan) self.assertEquals(len(plan), 1) self.assertEquals(len(plan[dummyNode].operators), 1)
def generate_and_run(options, all_nodes, server, k): root_node = find_root_node(options, all_nodes) source_nodes = get_source_nodes(options, all_nodes, root_node) g = jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) central_cube = define_raw_cube(g, "global_coral_urls", root_node, overwrite=True) if not options.no_echo: pull_q = jsapi.DelayedSubscriber(g, {}, sort_order="-count", num_results=k) # pull_q.set_cfg("ts_field", 0) # pull_q.set_cfg("start_ts", start_ts) pull_q.set_cfg("rollup_levels", "0,0,1") pull_q.set_cfg("window_offset", 20 * 1000) #but trailing by a few echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([central_cube, pull_q, echo]) tput_merge = jsapi.MultiRoundCoord(g) # tput_merge.set_cfg("start_ts", start_ts) # tput_merge.set_cfg("window_offset", 5 * 1000) # tput_merge.set_cfg("ts_field", 0) tput_merge.set_cfg("wait_for_start", 10) tput_merge.set_cfg("num_results", k) tput_merge.set_cfg("sort_column", "-count") # tput_merge.set_cfg("min_window_size", 5) tput_merge.set_cfg("rollup_levels", "0,0,1") # roll up time, response code and referer tput_merge.instantiate_on(root_node) g.chain([tput_merge, central_cube]) for node in source_nodes: local_cube = define_raw_cube(g, "local_records", node, overwrite=False) # print "cube output dimensions:", local_cube.get_output_dimensions() pull_from_local = jsapi.MultiRoundClient(g) pull_from_local.instantiate_on(node) lastOp = g.chain([local_cube, pull_from_local, tput_merge]) deploy_or_dummy(options, server, g)
def test_reader(self): g = jsapi.QueryGraph() k = 40 echoer = jsapi.SendK(g, k) resultReader = ClientDataReader() g.connectExternal(echoer, resultReader.prep_to_receive_data()) self.make_local_worker() #self.controller.deploy(g) self.validate_response(self.make_deploy_request(g)) # validate SendK by counting tuplesReceived = [] map(tuplesReceived.append, resultReader) self.assertEquals(len(tuplesReceived), k) print "client reader test succeeded"
def test_CVSParse_validate_bad(self): qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") csv_types = "IIIII" csvprs = jsapi.CSVParse(qGraph, csv_types) # should fail because the outschema of the previous CSVParse has an int # as its first element, while CVSParse currently needs a string as the # first element. this will probably change when CVSParse supports parsing # an arbritrarily indexed tuple, but the validation will be quite similar; # probably: # assert 'S' != csv_types[3] # note that this is a real assert, not a test # csvprs_fail = jsapi.CSVParse(qgraph, csv_types, field_to_parse=3) csvprs_fail = jsapi.CSVParse(qGraph, "DDSS") qGraph.connect(reader, csvprs) qGraph.connect(csvprs, csvprs_fail) self.assertRaises(SchemaError, qGraph.validate_schemas) # a hack for exceptions with types. This unittest function is new in python # 2.7, so will fail in 2.6 or earlier... self.assertRaisesRegexp(SchemaError, '[.\s]*requires a string[.\s]*', qGraph.validate_schemas)
def test_1node_failure(self): dummyNodeOutbound = ("host", 123) dummyNodeListening = ("host", 1235) c = Controller(("", 0)) c.start_computation_async = lambda x: 0 #stub out #Add a node add_node(c, dummyNodeOutbound, dummyNodeListening) #add a small topology qGraph = jsapi.QueryGraph() reader = jsapi.FileRead(qGraph, "file name") cube = qGraph.add_cube("local_results") cube.add_dim("hostname", Element.STRING, 0) cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1) qGraph.connect(reader, cube) req = ControlMessage() req.type = ControlMessage.ALTER qGraph.add_to_PB(req.alter.add()) resp = ControlMessage() c.handle_alter(resp, req.alter[0]) if resp.type != ControlMessage.OK: print resp.error_msg.msg self.assertEquals(ControlMessage.OK, resp.type) req.type = ControlMessage.ALTER_RESPONSE query_planner.overwrite_comp_ids(req.alter[0], resp.started_comp_id) c.handle_alter_response(req.alter[0], dummyNodeOutbound) #confirm topology started self.assertTrue('local_results' in c.cube_locations) # print c.cube_locations # drop node dummyNodeOutbound = self.stop_and_start(c, dummyNodeOutbound, dummyNodeListening, req) print "stopping a second time." self.stop_and_start(c, dummyNodeOutbound, dummyNodeListening, req)
def test_with_subscriber(self): dummyNode = ("host", 123) planner = QueryPlanner({dummyNode: dummyNode}) qGraph = jsapi.QueryGraph() cube = qGraph.add_cube("local_results") cube.add_dim("hostname", Element.STRING, 0) cube.add_dim("time", Element.TIME, 1) cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) cube.set_overwrite(True) #fresh results subscriber = jsapi.TimeSubscriber(qGraph, {"hostname": "http://foo.com"}, 1000) qGraph.connect(cube, subscriber) err = planner.take_raw_topo(qGraph.get_deploy_pb().alter[0]).lower() self.assertEquals(len(err), 0) plan = planner.get_assignments(1) self.assertTrue(dummyNode in plan) self.assertEquals(len(plan), 1)
def test_cubeFilterSubscriber(self): qGraph = jsapi.QueryGraph() src = jsapi.RandSource(qGraph, 1, 2) local_cube = qGraph.add_cube("results") local_cube.add_dim("state", Element.STRING, 0) local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2) filter = jsapi.FilterSubscriber(qGraph, cube_field=2, level_in_field=0) #out-schema from filter should be S,T, matching source ex = jsapi.ExtendOperator(qGraph, "i", ["a count"]) eval_op = jsapi.RandEval(qGraph) qGraph.chain([src, ex, local_cube, filter, eval_op]) reader = jsapi.FileRead(qGraph, "file name") csv_parse = jsapi.CSVParse(qGraph, types="I", fields_to_keep="all") qGraph.chain([reader, csv_parse, filter]) try: qGraph.validate_schemas() except SchemaError as ex: self.fail("should not throw, but got " + str(ex))
def main(): parser = standard_option_parser() parser.add_option("--mode", dest="mode", action="store", help="query to run. Should be one of %s" % MODE_LIST) parser.add_option("--wait", dest="wait", action="store", help="how long to wait for results") (options, args) = parser.parse_args() if options.mode: mode = options.mode if len(args) > 0: print "Can't specify mode as both an arg and an option." sys.exit(0) else: if len(args) == 0: print "Must specify a mode. Should be one of %s" % MODE_LIST sys.exit(0) mode = args[0] if mode == "quantiles": define_internal_cube = quant_cube src_to_internal = src_to_quant process_results = process_quant final_rollup_levels = "8,1" elif mode == "urls": define_internal_cube = url_cube src_to_internal = src_to_url process_results = lambda x,y,z: y final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled. elif mode == "domains": define_internal_cube = url_cube src_to_internal = src_to_domain process_results = lambda x,y,z: y final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled. elif mode == "domains_all": define_internal_cube = dom_notime src_to_internal = drop_time_from_doms process_results = lambda x,y,z: y final_rollup_levels = "1,1" #rollup time slightly, rest is unrolled. elif mode == "slow_reqs": define_internal_cube = url_cube src_to_internal = src_slow_reqs process_results = lambda x,y,z: y final_rollup_levels = "9,1,1" #nothing rolled up. elif mode == "bad_domains": define_internal_cube = bad_doms_cube src_to_internal = src_to_bad_doms process_results = bad_doms_postprocess final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled. elif mode == "total_bw": define_internal_cube = bw_cube src_to_internal = src_to_bw process_results = lambda x,y,z: y final_rollup_levels = "8,1" #rollup time slightly, rest is unrolled. elif mode == "bad_referers": define_internal_cube = badreferrer_cube src_to_internal = src_to_badreferrer process_results = badreferrer_out final_rollup_levels = "8,1" #rollup time slightly, rest is unrolled. else: print "Unknown mode %s" % mode sys.exit(0) all_nodes,server = get_all_nodes(options) if len(all_nodes) < 1: print "FATAL: no nodes" sys.exit(0) g= jsapi.QueryGraph() ops = [] union_node = find_root_node(options, all_nodes) for node in all_nodes: if node == union_node and not options.generate_at_union: continue raw_cube = define_raw_cube(g, "local_records", node, overwrite=False) raw_cube_sub = jsapi.TimeSubscriber(g, {}, 1000) raw_cube_sub.set_cfg("simulation_rate", options.warp_factor) raw_cube_sub.set_cfg("ts_field", 0) if options.start_ts: raw_cube_sub.set_cfg("start_ts", options.start_ts) # time_shift = jsapi.TimeWarp(g, field=0, warp=options.warp_factor) last_op = g.chain([raw_cube, raw_cube_sub]) #, time_shift]) last_op = src_to_internal(g, last_op, node, options) last_op.instantiate_on(node) ops.append(last_op) if len(ops) == 0: print "can't run, no [non-union] nodes" sys.exit(0) union_cube = define_internal_cube (g, "union_cube", union_node) g.agg_tree(ops, union_cube, start_ts =options.start_ts, sim_rate=options.warp_factor) if options.bw_cap: union_cube.set_inlink_bwcap(float(options.bw_cap)) #This is the final output subscriber pull_q = jsapi.TimeSubscriber(g, {}, 1000) #only for UI purposes pull_q.set_cfg("ts_field", 0) # pull_q.set_cfg("latency_ts_field", 7) if options.start_ts: pull_q.set_cfg("start_ts", options.start_ts) pull_q.set_cfg("rollup_levels", final_rollup_levels) pull_q.set_cfg("simulation_rate", options.warp_factor) pull_q.set_cfg("window_offset", 8* 1000) #...trailing by a few g.connect(union_cube, pull_q) last_op = process_results(g, pull_q, options) echo = jsapi.Echo(g) echo.instantiate_on(union_node) g.connect(last_op, echo) deploy_or_dummy(options, server, g)
def get_graph(source_nodes, root_node, options): g = jsapi.QueryGraph() start_ts = parse_ts(options.start_ts) central_cube = g.add_cube("global_coral_anamolous_quant") central_cube.instantiate_on(root_node) define_quant_cube(central_cube) pull_q = jsapi.TimeSubscriber(g, {}, 1000) pull_q.set_cfg("ts_field", 0) pull_q.set_cfg("start_ts", start_ts) # pull_q.set_cfg("rollup_levels", "8,1") # pull_q.set_cfg("simulation_rate",1) pull_q.set_cfg("window_offset", 6 * 1000) #but trailing by a few q_op = jsapi.Quantile(g, 0.95, field=1) g.chain([central_cube, pull_q, q_op]) thresh_cube = g.add_cube("global_coral_anamalous_thresh") thresh_cube.add_dim("time", CubeSchema.Dimension.TIME_CONTAINMENT, 0) thresh_cube.add_agg("thresh", jsapi.Cube.AggType.COUNT, 1) thresh_cube.set_overwrite(True) thresh_cube.instantiate_on(root_node) if not options.no_echo: echo = jsapi.Echo(g) echo.instantiate_on(root_node) g.chain([q_op, echo, thresh_cube]) else: g.chain([q_op, thresh_cube]) parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\ coral_fidxs['URL_requested'], coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_types) ] global_results = g.add_cube("global_anomalous") define_schema_for_raw_cube(global_results, parsed_field_offsets) global_results.instantiate_on(root_node) FILTER_FIELD = coral_fidxs['nbytes'] for node in source_nodes: ################ First do the data loading part f = jsapi.FileRead(g, options.fname, skip_empty=True) csvp = jsapi.CSVParse(g, coral_types) csvp.set_cfg("discard_off_size", "true") round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor) round.set_cfg("wait_for_catch_up", "true") f.instantiate_on(node) local_raw_cube = g.add_cube("local_coral_anamolous_all") define_schema_for_raw_cube(local_raw_cube, parsed_field_offsets) pass_raw = jsapi.FilterSubscriber( g) # to pass through to the summary and q-cube to_summary = jsapi.ToSummary(g, field=FILTER_FIELD, size=100) local_q_cube = g.add_cube("local_coral_anamolous_quant") define_quant_cube(local_q_cube, [coral_fidxs['timestamp'], FILTER_FIELD]) g.chain([ f, csvp, round, local_raw_cube, pass_raw, to_summary, local_q_cube ]) pull_from_local = jsapi.TimeSubscriber(g, {}, 1000) pull_from_local.instantiate_on(node) pull_from_local.set_cfg("simulation_rate", 1) pull_from_local.set_cfg("ts_field", 0) pull_from_local.set_cfg("start_ts", start_ts) pull_from_local.set_cfg("window_offset", 2000) #but trailing by a few local_q_cube.instantiate_on(node) pull_from_local.instantiate_on(node) g.chain([local_q_cube, pull_from_local, central_cube]) ################ Now do the second phase passthrough = jsapi.FilterSubscriber(g) passthrough.instantiate_on(root_node) filter = jsapi.FilterSubscriber(g, cube_field=FILTER_FIELD, level_in_field=1) filter.instantiate_on(node) g.chain([thresh_cube, passthrough, filter]) g.chain([local_raw_cube, filter, global_results]) return g