예제 #1
0
    def test_2node(self):
        dummyNode, dummy2 = ("host", 123), ("host2", 123)
        planner = QueryPlanner({dummyNode: dummyNode, dummy2: dummy2})

        g = jsapi.QueryGraph()

        ucube = g.add_cube("union")
        ucube.add_dim("state", Element.STRING, 0)
        ucube.add_agg("count", jsapi.Cube.AggType.COUNT, 1)

        readers = []
        for node in [dummyNode, dummy2]:
            reader = jsapi.FileRead(g, "file name")
            readers.append(reader)
            nID = NodeID()
            nID.address, nID.portno = node
            reader.instantiate_on(nID)


#      g.connect(reader, ucube) #agg tree test, so we don't need this

        g.agg_tree(readers, ucube)
        self.assertEquals(6, len(
            g.edges))  # 2x (op --> cube --> subscriber --> union)

        req = ControlMessage()
        req.type = ControlMessage.ALTER
        g.add_to_PB(req.alter.add())

        err = planner.take_raw_topo(req.alter[0]).lower()
        self.assertEquals(len(err), 0)
예제 #2
0
def parse_setup():
  (serv_addr, serv_port), file_to_parse = js_client_config.arg_config()

  k2 = 20 # how many to pull to top level
  k = 10 # how many to display

  # specify the query fields that this computation is interested in
  #which_coral_fields = [coral_fidxs['URL_requested']]
  agg_field_idx = coral_fidxs['URL_requested']

  g = jsapi.QueryGraph()
  f = jsapi.FileRead(g, file_to_parse, skip_empty=True)
  csvp = jsapi.CSVParse(g, coral_types)
  grab_domain = jsapi.GenericParse(g, DOMAIN_CAPTURE,
                                   coral_types[agg_field_idx],
                                   field_to_parse=agg_field_idx,
                                   keep_unparsed=False)
  pull_k2 = jsapi.TimeSubscriber(g, {}, 2000, "-count", k2)

  local_cube = g.add_cube("coral_results")
  local_cube.add_dim("Requested_domains", Element.STRING, 0)
  # index past end of tuple is a magic API to the "count" aggregate that tells
  # it to assume a count of 1
  local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1)
  local_cube.set_overwrite(True)  # fresh results

  g.chain([f, csvp, grab_domain, local_cube, pull_k2])

  cr = ClientDataReader(raw_data=True)
  g.connectExternal(pull_k2, cr.prep_to_receive_data())
  remote_deploy(serv_addr, serv_port, g, cube=local_cube)

  return cr
예제 #3
0
    def test_2op_plan(self):
        """This test creates an operator and a cube, attached."""
        dummyNode = ("host", 123)
        planner = QueryPlanner({dummyNode: dummyNode})

        qGraph = jsapi.QueryGraph()
        reader = jsapi.FileRead(qGraph, "file name")
        cube = qGraph.add_cube("local_results")
        cube.add_dim("hostname", Element.STRING, 0)
        cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1)
        cube.set_overwrite(True)  #fresh results

        qGraph.connect(reader, cube)

        req = ControlMessage()
        req.type = ControlMessage.ALTER
        qGraph.add_to_PB(req.alter.add())

        err = planner.take_raw_topo(req.alter[0]).lower()
        if len(err) > 0:
            print "Test yielded unexpected error:", err
        self.assertEquals(len(err), 0)

        plan = planner.get_assignments(1)
        self.assertTrue(dummyNode in plan)
        self.assertEquals(len(plan), 1)
        self.assertEquals(len(plan[dummyNode].operators), 1)
        self.assertEquals(len(plan[dummyNode].cubes), 1)

        pbToNode = plan[dummyNode].get_pb()
        self.assertEquals(len(pbToNode.alter[0].edges), 1)
예제 #4
0
    def test_cubeSubscribe(self):

        qGraph = jsapi.QueryGraph()
        local_cube = qGraph.add_cube("results")
        local_cube.add_dim("state", Element.STRING, 0)
        local_cube.add_dim("time", Element.TIME, 1)
        local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)

        sub = jsapi.TimeSubscriber(qGraph, {}, 1000,
                                   "-count")  #pull every second
        eval_op = jsapi.RandEval(qGraph)

        qGraph.connect(local_cube, sub)
        qGraph.connect(sub, eval_op)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.fail("should not throw, but got " + str(ex))

        sub2 = jsapi.TimeSubscriber(qGraph, {}, 1000,
                                    "-count")  #pull every second
        rounder = jsapi.TRoundOperator(qGraph, 0, 2)
        qGraph.connect(sub2, rounder)
        qGraph.connect(local_cube, sub2)

        #    self.assertTrue(1 not in qGraph.operators)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.assertTrue("requires that field 0 be a time" in str(ex))
            print "got expected err:", str(ex)
        else:
            self.fail("should throw, but didn't")
예제 #5
0
    def test_cubeInsert(self):

        qGraph = jsapi.QueryGraph()
        local_cube = qGraph.add_cube("results")
        local_cube.add_dim("state", Element.STRING, 0)
        local_cube.add_dim("time", Element.TIME, 1)
        local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)

        src = jsapi.RandSource(qGraph, 1, 2)
        qGraph.connect(src, local_cube)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.fail("should not throw, but got " + str(ex))

        qGraph.remove(src)

        #add a mismatched edge, string versus  string,time
        reader = jsapi.FileRead(qGraph, "file name")
        qGraph.connect(reader, local_cube)

        e_map = qGraph.forward_edge_map()
        self.assertEquals(len(e_map), 1)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            print "got expected err:", str(ex)
        else:
            self.fail("should throw, but didn't")
예제 #6
0
    def test_serializePolicy(self):

        qGraph = jsapi.QueryGraph()
        local_cube = qGraph.add_cube("results")
        local_cube.add_dim("state", Element.STRING, 0)
        local_cube.add_dim("time", Element.TIME, 1)
        local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)

        src = jsapi.RandSource(qGraph, 1, 2)
        sub = jsapi.TimeSubscriber(qGraph, {}, 1000,
                                   "-count")  #pull every second
        sample = jsapi.VariableSampling(qGraph)

        eval_op = jsapi.RandEval(qGraph)
        qGraph.chain([src, local_cube, sub, sample, eval_op])
        qGraph.add_policy([sub, sample])

        try:
            pb = qGraph.get_deploy_pb()
            self.assertEquals(len(pb.alter[0].congest_policies), 1)
            oid = pb.alter[0].congest_policies[0].op[0].task
            self.assertEquals(oid, sub.id)


#      print str(pb.alter)
        except SchemaError as ex:
            self.fail("should not throw, but got " + str(ex))
예제 #7
0
    def test_external_edge_plan(self):
        qGraph = jsapi.QueryGraph()
        reader = jsapi.FileRead(qGraph, "file name")
        req = ControlMessage()
        req.type = ControlMessage.ALTER
        qGraph.add_to_PB(req.alter.add())

        MY_PORTNO = 1000
        e = req.alter[0].edges.add()
        e.src = req.alter[0].toStart[0].id.task
        e.computation = 0
        e.dest_addr.address = "myhost"
        e.dest_addr.portno = MY_PORTNO

        dummyNode = ("host", 123)
        planner = QueryPlanner({dummyNode: dummyNode})
        err = planner.take_raw_topo(req.alter[0]).lower()
        self.assertEquals(len(err), 0)
        plan = planner.get_assignments(1)

        self.assertTrue(dummyNode in plan)
        self.assertEquals(len(plan), 1)
        self.assertEquals(len(plan[dummyNode].operators), 1)

        pbToNode = plan[dummyNode].get_pb()
        self.assertEquals(len(pbToNode.alter[0].edges), 1)
        self.assertEquals(pbToNode.alter[0].edges[0].dest_addr.portno,
                          MY_PORTNO)
예제 #8
0
def main():
    parser = standard_option_parser()
    (options, args) = parser.parse_args()

    all_nodes, server = get_all_nodes(options)
    root_node = find_root_node(options, all_nodes)

    print "%d worker nodes in system" % len(all_nodes)
    g = jsapi.QueryGraph()
    collector = jsapi.ImageQuality(g)
    collector.instantiate_on(root_node)

    if len(all_nodes) < 1 or (len(all_nodes) == 1
                              and options.generate_at_union):
        print "FAIL: not enough nodes"
        sys.exit(0)

    for node in all_nodes:
        if node == root_node and not options.generate_at_union:
            continue
        reader = jsapi.VideoSource(g, "/tmp/jetstream/mot.profile.csv",
                                   "/tmp/jetstream/mot.source.csv", 1500)
        timestamp = jsapi.TimestampOperator(g, "ms")
        reader.instantiate_on(node)
        g.chain([reader, timestamp, collector])

    print "deploying"
    deploy_or_dummy(options, server, g)
예제 #9
0
def get_graph(node, options):
    g = jsapi.QueryGraph()

    #we don't use this here
    #  start_ts = parse_ts(options.start_ts)

    #coral_fidxs['Referrer_URL'],
    parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\
      coral_fidxs['URL_requested'], coral_fidxs['nbytes'], \
      coral_fidxs['dl_utime'], len(coral_types) ]

    f = jsapi.FileRead(g, options.fname, skip_empty=True)
    csvp = jsapi.CSVParse(g, coral_types)
    csvp.set_cfg("discard_off_size", "true")
    round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor)
    round.set_cfg("wait_for_catch_up", "false")
    f.instantiate_on(node)

    local_raw_cube = define_raw_cube(g, options.cube_name, node,
                                     parsed_field_offsets, True)
    if not options.full_url:
        url_to_dom = jsapi.URLToDomain(g, field=coral_fidxs['URL_requested'])
        g.chain([f, csvp, round, url_to_dom, local_raw_cube])
    else:
        g.chain([f, csvp, round, local_raw_cube])
    return g
예제 #10
0
 def test_CSVParse_validate(self):
     qGraph = jsapi.QueryGraph()
     reader = jsapi.FileRead(qGraph, "file name")
     csvprs = jsapi.CSVParse(qGraph, "ISDDDIIDSISIISD")
     qGraph.connect(reader, csvprs)
     try:
         qGraph.validate_schemas()
     except SchemaError as ex:
         self.fail("Should not throw, but got: " + str(ex))
예제 #11
0
def main():

  parser = standard_option_parser()
  parser.add_option("--rate", dest="img_per_sec",
  default="2", help="number of images to send per second")
  parser.add_option("--dir", dest="dirname",
  default="sample_images", help="where to read from")
  parser.add_option("--prefix", dest="prefix", default = "l", help="prefix for images.")
  parser.add_option("--degradation", dest="deg",
  default="interval", help="which degradation to use; can be hash, interval")


  (options, args) = parser.parse_args()

  all_nodes,server = get_all_nodes(options)
  root_node = find_root_node(options, all_nodes)
  
  print "%d worker nodes in system" % len(all_nodes)
  g= jsapi.QueryGraph()
  files_per_window = float(options.img_per_sec) * window_len_sec
  collector = jsapi.ImageQuality(g)
  collector.instantiate_on(root_node)

  if len(all_nodes) < 1 or (len(all_nodes) == 1 and options.generate_at_union):
    print "FAIL: not enough nodes"
    sys.exit(0)
  
  if options.deg == INTERVAL:
    print "Using interval sampling (Coarse-grained)"
  elif options.deg ==  HASH:
    print "Using hash-sampling. (Fine-grained)"
  elif options.deg == NONE:
    print "No degradation"
  else:
    print "unknown degradation %s. Aborting" % options.deg
    sys.exit(0)
    
  for node in all_nodes:
    if node == root_node and not options.generate_at_union:
      continue
    reader = jsapi.BlobReader(g, dirname=options.dirname, prefix=options.prefix, files_per_window=files_per_window, ms_per_window = 1000 * window_len_sec)
    if options.deg == INTERVAL:
      filter = jsapi.IntervalSampling(g, max_interval=4)
    elif options.deg ==  HASH:
      filter = jsapi.VariableSampling(g, field=0, type='I')
      filter.set_cfg("steps", "20")
      
    timestamp = jsapi.TimestampOperator(g, "ms")
    reader.instantiate_on(node)
    
    if options.deg == NONE:
      g.chain([reader, timestamp,  collector])
    else:
      g.chain([reader, filter, timestamp,  collector])
      
  print "deploying"
  deploy_or_dummy(options, server, g)
예제 #12
0
def main():
    parser = OptionParser()
    parser.add_option("-C",
                      "--config",
                      dest="config_file",
                      help="read config from FILE",
                      metavar="FILE")

    parser.add_option("-a",
                      "--controller",
                      dest="controller",
                      help="controller address",
                      default="localhost:3456")

    (options, args) = parser.parse_args()

    serv_addr, serv_port = normalize_controller_addr(options.controller)

    file_to_parse = args[0]

    k2 = 20  #how many to pull to top level
    k = 10  #how many to display

    ### Define the graph abstractly, without a computation
    g = jsapi.QueryGraph()
    reader = jsapi.FileRead(g, file_to_parse)
    parse = jsapi.GenericParse(g, ".*GET ([^ ]*) .*", "s")

    local_cube = g.add_cube("local_results")
    local_cube.add_dim("url", Element.STRING, 0)
    #  cube.add_dim("hostname", Element.STRING, 1)
    local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1)
    local_cube.set_overwrite(True)  #fresh results

    pull_k2 = jsapi.TimeSubscriber(g, {}, 2000, "-count", k2)
    echo = jsapi.Echo(g)

    #  local_cube = jsapi.Echo(g)

    g.connect(reader, parse)
    g.connect(parse, local_cube)
    g.connect(local_cube, pull_k2)
    g.connect(pull_k2, echo)

    #  Should do a pull into a consolidated cube

    #### Finished building in memory, now to join
    server = RemoteController((serv_addr, serv_port))

    n = server.get_a_node()
    assert isinstance(n, NodeID)
    all_nodes = server.all_nodes()

    local_cube.instantiate_on(all_nodes)

    server.deploy(g)
예제 #13
0
    def test_file_and_counter(self):
        qGraph = jsapi.QueryGraph()
        reader = jsapi.FileRead(qGraph, "file name")
        counter = jsapi.RateRecord(qGraph)
        qGraph.connect(reader, counter)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.fail("Should not throw, but got: " + str(ex))
예제 #14
0
 def test_randEval(self):
     qGraph = jsapi.QueryGraph()
     src = jsapi.RandSource(qGraph, 1, 2)
     ex = jsapi.ExtendOperator(qGraph, "i", ["a count"])
     eval = jsapi.RandEval(qGraph)
     qGraph.connect(src, ex)
     qGraph.connect(ex, eval)
     try:
         qGraph.validate_schemas()
     except SchemaError as ex:
         self.fail("Should not throw, but got: " + str(ex))
예제 #15
0
def main():

  parser = OptionParser()
  parser.add_option("-C", "--config", dest="config_file",
                  help="read config from FILE", metavar="FILE")

  parser.add_option("-a", "--controller", dest="controller",
                  help="controller address", default="localhost:3456")
  (options, args) = parser.parse_args()
  pattern = ".*" + args[0] + ".*"
  file_to_grep = args[1]


  if ':' in options.controller:
    (serv_addr, serv_port) = options.controller.split(':')
    serv_port = int(serv_port)
  else:
    serv_addr = options.controller
    serv_port = 3456
  
  
  ### Define the graph abstractly, without a computation
  g = jsapi.QueryGraph()
  reader = jsapi.FileRead(g, file_to_grep)
  grepper = jsapi.StringGrep(g, pattern)
  host_extend = jsapi.ExtendOperator(g, "s", ["${HOSTNAME}"])
  
  cube = g.add_cube("local_results")
  cube.add_dim("log_line", Element.STRING, 0)
  cube.add_dim("hostname", Element.STRING, 1)
  cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)

  cube.set_overwrite(True)  #fresh results

  g.connect(reader,grepper)
  g.connect(grepper, host_extend)
  g.connect(host_extend, cube)
  
  #### Finished building in memory, now to join
  server = RemoteController()
  server.connect(serv_addr, serv_port)
  n = server.get_a_node()
  assert isinstance(n, NodeID)
  nodes = server.all_nodes()
  
  cube.instantiate_on(n)
  host_extend.instantiate_on(nodes)

  result_reader = ClientDataReader()
  net_addr = result_reader.prep_to_receive_data()
  g.connect(cube, net_addr)

  server.deploy(g)
  result_reader.blocking_read(lambda x: print x )
예제 #16
0
 def test_cubeInsertPartial(self):
     print "inserting tuple with more data than cube"
     #tests insertion where the tuple has more fields than the cube
     qGraph = jsapi.QueryGraph()
     local_cube = qGraph.add_cube("results")
     local_cube.add_dim("state", Element.STRING, 0)
     src = jsapi.RandSource(qGraph, 1, 2)
     qGraph.connect(src, local_cube)
     try:
         qGraph.validate_schemas()
     except SchemaError as ex:
         self.fail("should not throw, but got " + str(ex))
예제 #17
0
    def test_bad_edge(self):
        qGraph = jsapi.QueryGraph()
        reader = jsapi.FileRead(qGraph, "file name")
        rounder = jsapi.TRoundOperator(qGraph, 2, 2)
        qGraph.connect(reader, rounder)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.assertTrue("can't round field 2" in str(ex))
        else:
            self.fail("should throw, but didn't")
예제 #18
0
def main():

    parser = standard_option_parser()
    (options, args) = parser.parse_args()
    all_nodes, server = get_all_nodes(options)

    root_node = find_root_node(options, all_nodes)
    source_nodes = get_source_nodes(options, all_nodes, root_node)
    g = jsapi.QueryGraph()

    start_ts = parse_ts(options.start_ts)
    central_cube = define_raw_cube(g,
                                   "global_coral_urls",
                                   root_node,
                                   overwrite=True)

    if not options.no_echo:
        pull_q = jsapi.TimeSubscriber(g, {},
                                      30000,
                                      sort_order="-count",
                                      num_results=10)
        pull_q.set_cfg("ts_field", 0)
        pull_q.set_cfg("start_ts", start_ts)
        pull_q.set_cfg(
            "rollup_levels", "6,0,1"
        )  # every five seconds to match subscription. Roll up counts.
        pull_q.set_cfg("simulation_rate", 1)
        pull_q.set_cfg("window_offset", 6 * 1000)  #but trailing by a few
        echo = jsapi.Echo(g)
        echo.instantiate_on(root_node)
        g.chain([central_cube, pull_q, echo])

    tput_merge = jsapi.MultiRoundCoord(g)
    tput_merge.set_cfg("start_ts", start_ts)
    tput_merge.set_cfg("window_offset", 5 * 1000)
    tput_merge.set_cfg("ts_field", 0)
    tput_merge.set_cfg("num_results", 10)
    tput_merge.set_cfg("sort_column", "-count")
    tput_merge.set_cfg("min_window_size", 5)
    tput_merge.set_cfg("rollup_levels",
                       "10,0,1")  # roll up response code and referer
    tput_merge.instantiate_on(root_node)
    g.chain([tput_merge, central_cube])

    for node in source_nodes:
        local_cube = define_raw_cube(g, "local_records", node, overwrite=False)
        #    print "cube output dimensions:", local_cube.get_output_dimensions()
        pull_from_local = jsapi.MultiRoundClient(g)
        pull_from_local.instantiate_on(node)
        lastOp = g.chain([local_cube, pull_from_local, tput_merge])

    deploy_or_dummy(options, server, g)
예제 #19
0
    def test_with_partial_placement(self):
        dummyNode1 = ("host", 123)
        dummyNode2 = ("host2", 234)

        planner = QueryPlanner({
            dummyNode1: dummyNode1,
            dummyNode2: dummyNode2
        })
        g = jsapi.QueryGraph()

        evalOp = jsapi.RandEval(g)

        for node, k in zip([dummyNode1, dummyNode2], range(0, 2)):
            src = jsapi.RandSource(g, 1, 2)
            src.set_cfg("rate", 1000)

            localCube = g.add_cube("local_results_%d" % k)
            localCube.add_dim("state", Element.STRING, 0)
            localCube.add_dim("time", Element.TIME, 1)
            localCube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)

            pullOp = jsapi.TimeSubscriber(g, {}, 1000)
            pullOp.set_cfg("ts_field", 1)
            pullOp.set_cfg("window_offset",
                           1000)  #pull every three seconds, trailing by one

            extendOp = jsapi.ExtendOperator(g, "s", ["node" + str(k)])
            roundOp = jsapi.TRoundOperator(g, fld=1, round_to=5)
            g.connect(src, localCube)
            g.connect(localCube, pullOp)
            g.connect(pullOp, extendOp)
            g.connect(extendOp, roundOp)
            g.connect(roundOp, evalOp)

            nID = NodeID()
            nID.address, nID.portno = node
            src.instantiate_on(nID)

        g.validate_schemas()

        err = planner.take_raw_topo(g.get_deploy_pb().alter[0])
        self.assertEquals(len(err), 0)
        plan = planner.get_assignments(1)

        pb1 = plan[dummyNode1].get_pb().alter[0]

        subscribers = [x for x in pb1.toStart if "Subscriber" in x.op_typename]
        self.assertEquals(len(subscribers), len(pb1.toCreate))
        self.assertEquals(len(pb1.toCreate), 1)
        self.assertGreater(len(pb1.toStart), 3)
        self.assertLessEqual(len(pb1.toStart), 4)
예제 #20
0
    def test_bad_unify(self):
        qGraph = jsapi.QueryGraph()
        src = jsapi.RandSource(qGraph, 1, 2)
        reader = jsapi.FileRead(qGraph, "file name")
        dest = jsapi.ExtendOperator(qGraph, "s", ["a string"])
        qGraph.connect(reader, dest)
        qGraph.connect(src, dest)

        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.assertTrue("match existing schema" in str(ex))
#      print "got expected err:", str(ex)
        else:
            self.fail("should throw, but didn't")
예제 #21
0
def get_graph(source_nodes, root_node, options):
    ECHO_RESULTS = not options.no_echo
    g = jsapi.QueryGraph()
    BOUND = 100

    start_ts = parse_ts(options.start_ts)

    parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\
       coral_fidxs['URL_requested'], coral_fidxs['nbytes'], coral_fidxs['dl_utime'],
      len(coral_fidxs) ]

    global_results = g.add_cube("global_slow")
    define_schema_for_raw_cube(global_results, parsed_field_offsets)
    global_results.instantiate_on(root_node)

    congest_logger = jsapi.AvgCongestLogger(g)
    congest_logger.instantiate_on(root_node)

    g.connect(congest_logger, global_results)

    if ECHO_RESULTS:
        pull_q = jsapi.TimeSubscriber(g, {}, 1000)
        pull_q.set_cfg("ts_field", 0)
        pull_q.set_cfg("start_ts", start_ts)
        #    pull_q.set_cfg("rollup_levels", "8,1")
        #    pull_q.set_cfg("simulation_rate",1)
        pull_q.set_cfg("window_offset", 6 * 1000)  #but trailing by a few

        echo = jsapi.Echo(g)
        echo.instantiate_on(root_node)
        g.chain([global_results, pull_q, echo])

    for node, i in numbered(source_nodes, False):

        f = jsapi.FileRead(g, options.fname, skip_empty=True)
        csvp = jsapi.CSVParse(g, coral_types)
        csvp.set_cfg("discard_off_size", "true")
        round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor)
        round.set_cfg("wait_for_catch_up", "true")
        f.instantiate_on(node)

        filter = jsapi.RatioFilter(g, numer=coral_fidxs['dl_utime'], \
          denom = coral_fidxs['nbytes'], bound = BOUND)
        g.chain([f, csvp, round, filter, congest_logger])

    return g
예제 #22
0
    def test_1node_plan(self):

        dummyNode = ("host", 123)
        planner = QueryPlanner({dummyNode: dummyNode})

        qGraph = jsapi.QueryGraph()
        reader = jsapi.FileRead(qGraph, "file name")
        req = ControlMessage()
        req.type = ControlMessage.ALTER
        qGraph.add_to_PB(req.alter.add())

        err = planner.take_raw_topo(req.alter[0]).lower()
        self.assertEquals(len(err), 0)

        plan = planner.get_assignments(1)
        self.assertTrue(dummyNode in plan)
        self.assertEquals(len(plan), 1)
        self.assertEquals(len(plan[dummyNode].operators), 1)
def generate_and_run(options, all_nodes, server, k):
    root_node = find_root_node(options, all_nodes)
    source_nodes = get_source_nodes(options, all_nodes, root_node)
    g = jsapi.QueryGraph()

    start_ts = parse_ts(options.start_ts)
    central_cube = define_raw_cube(g,
                                   "global_coral_urls",
                                   root_node,
                                   overwrite=True)

    if not options.no_echo:
        pull_q = jsapi.DelayedSubscriber(g, {},
                                         sort_order="-count",
                                         num_results=k)
        #    pull_q.set_cfg("ts_field", 0)
        #    pull_q.set_cfg("start_ts", start_ts)
        pull_q.set_cfg("rollup_levels", "0,0,1")
        pull_q.set_cfg("window_offset", 20 * 1000)  #but trailing by a few
        echo = jsapi.Echo(g)
        echo.instantiate_on(root_node)
        g.chain([central_cube, pull_q, echo])

    tput_merge = jsapi.MultiRoundCoord(g)
    #  tput_merge.set_cfg("start_ts", start_ts)
    #  tput_merge.set_cfg("window_offset", 5 * 1000)
    #  tput_merge.set_cfg("ts_field", 0)
    tput_merge.set_cfg("wait_for_start", 10)
    tput_merge.set_cfg("num_results", k)
    tput_merge.set_cfg("sort_column", "-count")
    #  tput_merge.set_cfg("min_window_size", 5)
    tput_merge.set_cfg("rollup_levels",
                       "0,0,1")  # roll up time, response code and referer
    tput_merge.instantiate_on(root_node)
    g.chain([tput_merge, central_cube])

    for node in source_nodes:
        local_cube = define_raw_cube(g, "local_records", node, overwrite=False)
        #    print "cube output dimensions:", local_cube.get_output_dimensions()
        pull_from_local = jsapi.MultiRoundClient(g)
        pull_from_local.instantiate_on(node)
        lastOp = g.chain([local_cube, pull_from_local, tput_merge])

    deploy_or_dummy(options, server, g)
예제 #24
0
    def test_reader(self):
        g = jsapi.QueryGraph()

        k = 40
        echoer = jsapi.SendK(g, k)

        resultReader = ClientDataReader()
        g.connectExternal(echoer, resultReader.prep_to_receive_data())

        self.make_local_worker()
        #self.controller.deploy(g)
        self.validate_response(self.make_deploy_request(g))

        # validate SendK by counting
        tuplesReceived = []
        map(tuplesReceived.append, resultReader)

        self.assertEquals(len(tuplesReceived), k)
        print "client reader test succeeded"
예제 #25
0
 def test_CVSParse_validate_bad(self):
     qGraph = jsapi.QueryGraph()
     reader = jsapi.FileRead(qGraph, "file name")
     csv_types = "IIIII"
     csvprs = jsapi.CSVParse(qGraph, csv_types)
     # should fail because the outschema of the previous CSVParse has an int
     # as its first element, while CVSParse currently needs a string as the
     # first element. this will probably change when CVSParse supports parsing
     # an arbritrarily indexed tuple, but the validation will be quite similar;
     # probably:
     # assert 'S' != csv_types[3] # note that this is a real assert, not a test
     # csvprs_fail = jsapi.CSVParse(qgraph, csv_types, field_to_parse=3)
     csvprs_fail = jsapi.CSVParse(qGraph, "DDSS")
     qGraph.connect(reader, csvprs)
     qGraph.connect(csvprs, csvprs_fail)
     self.assertRaises(SchemaError, qGraph.validate_schemas)
     # a hack for exceptions with types. This unittest function is new in python
     # 2.7, so will fail in 2.6 or earlier...
     self.assertRaisesRegexp(SchemaError, '[.\s]*requires a string[.\s]*',
                             qGraph.validate_schemas)
    def test_1node_failure(self):
        dummyNodeOutbound = ("host", 123)
        dummyNodeListening = ("host", 1235)

        c = Controller(("", 0))
        c.start_computation_async = lambda x: 0  #stub out

        #Add a node
        add_node(c, dummyNodeOutbound, dummyNodeListening)

        #add a small topology
        qGraph = jsapi.QueryGraph()
        reader = jsapi.FileRead(qGraph, "file name")
        cube = qGraph.add_cube("local_results")
        cube.add_dim("hostname", Element.STRING, 0)
        cube.add_agg("count", jsapi.Cube.AggType.COUNT, 1)
        qGraph.connect(reader, cube)

        req = ControlMessage()
        req.type = ControlMessage.ALTER
        qGraph.add_to_PB(req.alter.add())
        resp = ControlMessage()
        c.handle_alter(resp, req.alter[0])
        if resp.type != ControlMessage.OK:
            print resp.error_msg.msg
        self.assertEquals(ControlMessage.OK, resp.type)

        req.type = ControlMessage.ALTER_RESPONSE
        query_planner.overwrite_comp_ids(req.alter[0], resp.started_comp_id)
        c.handle_alter_response(req.alter[0], dummyNodeOutbound)

        #confirm topology started
        self.assertTrue('local_results' in c.cube_locations)
        #    print c.cube_locations

        #   drop node
        dummyNodeOutbound = self.stop_and_start(c, dummyNodeOutbound,
                                                dummyNodeListening, req)

        print "stopping a second time."
        self.stop_and_start(c, dummyNodeOutbound, dummyNodeListening, req)
예제 #27
0
    def test_with_subscriber(self):
        dummyNode = ("host", 123)
        planner = QueryPlanner({dummyNode: dummyNode})

        qGraph = jsapi.QueryGraph()
        cube = qGraph.add_cube("local_results")
        cube.add_dim("hostname", Element.STRING, 0)
        cube.add_dim("time", Element.TIME, 1)
        cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)
        cube.set_overwrite(True)  #fresh results

        subscriber = jsapi.TimeSubscriber(qGraph,
                                          {"hostname": "http://foo.com"}, 1000)
        qGraph.connect(cube, subscriber)

        err = planner.take_raw_topo(qGraph.get_deploy_pb().alter[0]).lower()
        self.assertEquals(len(err), 0)

        plan = planner.get_assignments(1)
        self.assertTrue(dummyNode in plan)
        self.assertEquals(len(plan), 1)
예제 #28
0
    def test_cubeFilterSubscriber(self):
        qGraph = jsapi.QueryGraph()

        src = jsapi.RandSource(qGraph, 1, 2)

        local_cube = qGraph.add_cube("results")
        local_cube.add_dim("state", Element.STRING, 0)
        local_cube.add_agg("count", jsapi.Cube.AggType.COUNT, 2)

        filter = jsapi.FilterSubscriber(qGraph, cube_field=2, level_in_field=0)
        #out-schema from filter should be S,T, matching source
        ex = jsapi.ExtendOperator(qGraph, "i", ["a count"])
        eval_op = jsapi.RandEval(qGraph)

        qGraph.chain([src, ex, local_cube, filter, eval_op])

        reader = jsapi.FileRead(qGraph, "file name")
        csv_parse = jsapi.CSVParse(qGraph, types="I", fields_to_keep="all")
        qGraph.chain([reader, csv_parse, filter])
        try:
            qGraph.validate_schemas()
        except SchemaError as ex:
            self.fail("should not throw, but got " + str(ex))
예제 #29
0
def main():

  parser = standard_option_parser()
  parser.add_option("--mode", dest="mode",
  action="store", help="query to run. Should be one of %s" % MODE_LIST)
  parser.add_option("--wait", dest="wait",
  action="store", help="how long to wait for results")
  (options, args) = parser.parse_args()
  
  if options.mode:
    mode = options.mode
    if len(args) > 0:
      print "Can't specify mode as both an arg and an option."
      sys.exit(0)
  else:
    if len(args) == 0:
      print "Must specify a mode. Should be one of %s" % MODE_LIST
      sys.exit(0)
    mode = args[0]
  
  if mode == "quantiles":
    define_internal_cube = quant_cube
    src_to_internal = src_to_quant
    process_results = process_quant
    final_rollup_levels = "8,1"
  elif mode == "urls":
    define_internal_cube = url_cube
    src_to_internal = src_to_url
    process_results = lambda x,y,z: y
    final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled.
  elif mode == "domains":
    define_internal_cube = url_cube
    src_to_internal = src_to_domain
    process_results = lambda x,y,z: y
    final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled.
  elif mode == "domains_all":
    define_internal_cube = dom_notime
    src_to_internal = drop_time_from_doms
    process_results = lambda x,y,z: y
    final_rollup_levels = "1,1" #rollup time slightly, rest is unrolled.    
  elif mode == "slow_reqs":
    define_internal_cube = url_cube
    src_to_internal = src_slow_reqs
    process_results = lambda x,y,z: y
    final_rollup_levels = "9,1,1" #nothing rolled up.
  elif mode == "bad_domains":
    define_internal_cube = bad_doms_cube
    src_to_internal = src_to_bad_doms
    process_results = bad_doms_postprocess
    final_rollup_levels = "8,1,1" #rollup time slightly, rest is unrolled.
  elif mode == "total_bw":
    define_internal_cube = bw_cube
    src_to_internal = src_to_bw
    process_results = lambda x,y,z: y
    final_rollup_levels = "8,1" #rollup time slightly, rest is unrolled.
  elif mode == "bad_referers":
    define_internal_cube = badreferrer_cube
    src_to_internal = src_to_badreferrer
    process_results = badreferrer_out
    final_rollup_levels = "8,1" #rollup time slightly, rest is unrolled.    
  else:
    print "Unknown mode %s" % mode
    sys.exit(0)

  all_nodes,server = get_all_nodes(options)
  if len(all_nodes) < 1:
    print "FATAL: no nodes"
    sys.exit(0)
  
  g= jsapi.QueryGraph()

  ops = []
  union_node = find_root_node(options, all_nodes)

  for node in all_nodes:
    if node == union_node and not options.generate_at_union:
      continue
    raw_cube = define_raw_cube(g, "local_records", node, overwrite=False)
    raw_cube_sub = jsapi.TimeSubscriber(g, {}, 1000)
    raw_cube_sub.set_cfg("simulation_rate", options.warp_factor)
    raw_cube_sub.set_cfg("ts_field", 0)
    if options.start_ts:
      raw_cube_sub.set_cfg("start_ts", options.start_ts)      
#    time_shift = jsapi.TimeWarp(g, field=0, warp=options.warp_factor)
    
    last_op = g.chain([raw_cube, raw_cube_sub]) #, time_shift]) 
    last_op = src_to_internal(g, last_op, node, options)
    last_op.instantiate_on(node)
    ops.append(last_op)
    
  if len(ops) == 0:
    print "can't run, no [non-union] nodes"
    sys.exit(0) 
    
  union_cube = define_internal_cube (g, "union_cube", union_node)

  g.agg_tree(ops, union_cube, start_ts =options.start_ts, sim_rate=options.warp_factor)

  if options.bw_cap:
    union_cube.set_inlink_bwcap(float(options.bw_cap))

      #This is the final output subscriber
  pull_q = jsapi.TimeSubscriber(g, {}, 1000) #only for UI purposes
  pull_q.set_cfg("ts_field", 0)
#  pull_q.set_cfg("latency_ts_field", 7)
  if options.start_ts:
    pull_q.set_cfg("start_ts", options.start_ts)
  pull_q.set_cfg("rollup_levels", final_rollup_levels)
  pull_q.set_cfg("simulation_rate", options.warp_factor)
  pull_q.set_cfg("window_offset", 8* 1000) #...trailing by a few

  g.connect(union_cube, pull_q)
  last_op = process_results(g, pull_q, options)  

  echo = jsapi.Echo(g)
  echo.instantiate_on(union_node)
  g.connect(last_op, echo)
    
  deploy_or_dummy(options, server, g)
예제 #30
0
def get_graph(source_nodes, root_node, options):
    g = jsapi.QueryGraph()

    start_ts = parse_ts(options.start_ts)

    central_cube = g.add_cube("global_coral_anamolous_quant")
    central_cube.instantiate_on(root_node)
    define_quant_cube(central_cube)

    pull_q = jsapi.TimeSubscriber(g, {}, 1000)
    pull_q.set_cfg("ts_field", 0)
    pull_q.set_cfg("start_ts", start_ts)
    #    pull_q.set_cfg("rollup_levels", "8,1")
    #    pull_q.set_cfg("simulation_rate",1)
    pull_q.set_cfg("window_offset", 6 * 1000)  #but trailing by a few

    q_op = jsapi.Quantile(g, 0.95, field=1)

    g.chain([central_cube, pull_q, q_op])

    thresh_cube = g.add_cube("global_coral_anamalous_thresh")
    thresh_cube.add_dim("time", CubeSchema.Dimension.TIME_CONTAINMENT, 0)
    thresh_cube.add_agg("thresh", jsapi.Cube.AggType.COUNT, 1)
    thresh_cube.set_overwrite(True)
    thresh_cube.instantiate_on(root_node)

    if not options.no_echo:
        echo = jsapi.Echo(g)
        echo.instantiate_on(root_node)
        g.chain([q_op, echo, thresh_cube])
    else:
        g.chain([q_op, thresh_cube])

    parsed_field_offsets = [coral_fidxs['timestamp'], coral_fidxs['HTTP_stat'],\
       coral_fidxs['URL_requested'], coral_fidxs['nbytes'], coral_fidxs['dl_utime'], len(coral_types) ]

    global_results = g.add_cube("global_anomalous")
    define_schema_for_raw_cube(global_results, parsed_field_offsets)
    global_results.instantiate_on(root_node)

    FILTER_FIELD = coral_fidxs['nbytes']
    for node in source_nodes:
        ################ First do the data loading part
        f = jsapi.FileRead(g, options.fname, skip_empty=True)
        csvp = jsapi.CSVParse(g, coral_types)
        csvp.set_cfg("discard_off_size", "true")
        round = jsapi.TimeWarp(g, field=1, warp=options.warp_factor)
        round.set_cfg("wait_for_catch_up", "true")
        f.instantiate_on(node)

        local_raw_cube = g.add_cube("local_coral_anamolous_all")
        define_schema_for_raw_cube(local_raw_cube, parsed_field_offsets)

        pass_raw = jsapi.FilterSubscriber(
            g)  # to pass through to the summary and q-cube
        to_summary = jsapi.ToSummary(g, field=FILTER_FIELD, size=100)

        local_q_cube = g.add_cube("local_coral_anamolous_quant")
        define_quant_cube(local_q_cube,
                          [coral_fidxs['timestamp'], FILTER_FIELD])

        g.chain([
            f, csvp, round, local_raw_cube, pass_raw, to_summary, local_q_cube
        ])

        pull_from_local = jsapi.TimeSubscriber(g, {}, 1000)
        pull_from_local.instantiate_on(node)
        pull_from_local.set_cfg("simulation_rate", 1)
        pull_from_local.set_cfg("ts_field", 0)
        pull_from_local.set_cfg("start_ts", start_ts)
        pull_from_local.set_cfg("window_offset", 2000)  #but trailing by a few

        local_q_cube.instantiate_on(node)
        pull_from_local.instantiate_on(node)
        g.chain([local_q_cube, pull_from_local, central_cube])

        ################ Now do the second phase
        passthrough = jsapi.FilterSubscriber(g)
        passthrough.instantiate_on(root_node)

        filter = jsapi.FilterSubscriber(g,
                                        cube_field=FILTER_FIELD,
                                        level_in_field=1)
        filter.instantiate_on(node)
        g.chain([thresh_cube, passthrough, filter])
        g.chain([local_raw_cube, filter, global_results])

    return g