def test_partition_query(): with Cluster(command=CMD, workers=3) as cluster: q = Query(cluster, "partition-query") got = q.result() assert sorted(["state_partitions", "stateless_partitions"]) == sorted(got.keys()) assert got["state_partitions"]["DummyState"].has_key("initializer")
def test_state_entity_query(): with Cluster(command=CMD, workers=2) as cluster: given_data_sent(cluster) got = Query(cluster, "state-entity-query").result() assert sorted(got.keys()) == [u'DummyState', u'PartitionedDummyState'] assert got[u'DummyState'] == [u'key'] assert len(got[u'PartitionedDummyState']) == 7
def test_source_ids_query(): HARDCODED_NO_OF_SOURCE_IDS = 10 with Cluster(command=CMD, sources=1) as cluster: given_data_sent(cluster) q = Query(cluster, "source-ids-query") got = q.result() assert got.keys() == ["source_ids"] assert len(got["source_ids"]) == HARDCODED_NO_OF_SOURCE_IDS
def test_cluster_status_query(): with Cluster(command=CMD, workers=2) as cluster: q = Query(cluster, "cluster-status-query") assert q.result() == { u"processing_messages": True, u"worker_names": [u"initializer", u"worker1"], u"worker_count": 2 }
def test_stateless_partition_query(): with Cluster(command=CMD, workers=2) as cluster: got = Query(cluster, "stateless-partition-query").result() for (k, v) in got.items(): assert int(k) assert sorted(v.keys()) == [u"initializer", u"worker1"] assert len(v[u"initializer"]) == 1 assert int((v[u"initializer"])[0]) assert len(v[u"worker1"]) == 1 assert int((v[u"worker1"])[0])
def test_partition_count_query(): with Cluster(command=CMD, ) as cluster: given_data_sent(cluster) got = Query(cluster, "partition-count-query").result() assert sorted( got.keys()) == ["state_partitions", "stateless_partitions"] assert got["state_partitions"] == { u"DummyState": { u"initializer": 1 }, u"PartitionedDummyState": { u"initializer": INPUT_ITEMS } } for (k, v) in got["stateless_partitions"].items(): assert int(k) assert v == {u"initializer": 1}
def _run(persistent_data, res_ops, command, ops=[], initial=None, sources=1, partition_multiplier=1, validate_output=True, sender_mps=1000, sender_interval=0.01): host = '127.0.0.1' sinks = 1 sink_mode = 'framed' batch_size = int(sender_mps * sender_interval) logging.debug("batch_size is {}".format(batch_size)) if not isinstance(ops, (list, tuple)): raise TypeError("ops must be a list or tuple of operations") # If no initial workers value is given, determine the minimum number # required at the start so that the cluster never goes below 1 worker. # If a number is given, then verify it is sufficient. if ops: if isinstance(ops[0], Recover): raise ValueError("The first operation cannot be Recover") lowest = lowest_point(ops) if lowest < 1: min_workers = abs(lowest) + 1 else: min_workers = 1 if isinstance(initial, int): logging.debug('initial: {}'.format(initial)) logging.debug('min: {}'.format(min_workers)) assert (initial >= min_workers) workers = initial else: workers = min_workers else: # Test is only for setup using initial workers assert (initial > 0) workers = initial logging.info("Initial cluster size: {}".format(workers)) partition_multiplier = 5 # Used in partition count creation # create the sequence generator and the reader msg = MultiSequenceGenerator(base_parts=workers * partition_multiplier - 1) # Start cluster logging.debug("Creating cluster") with Cluster(command=command, host=host, sources=sources, workers=workers, sinks=sinks, sink_mode=sink_mode, persistent_data=persistent_data) as cluster: # start senders for s in range(sources): sender = Sender(cluster.source_addrs[0], Reader(msg), batch_size=batch_size, interval=sender_interval, reconnect=True) cluster.add_sender(sender, start=True) # let the senders send some data first time.sleep(1) # loop over ops, keeping the result and passing it to the next op res = None assert (not cluster.get_crashed_workers()) for op in ops: res_ops.append(op) logging.info("Executing: {}".format(op)) res = op.apply(cluster, res) assert (not cluster.get_crashed_workers()) # Wait a full second for things to calm down time.sleep(1) # If using external senders, wait for them to stop cleanly if cluster.senders: # Tell the multi-sequence-sender to stop msg.stop() # wait for senders to reach the end of their readers and stop for s in cluster.senders: cluster.wait_for_sender(s) # Validate all sender values caught up stop_value = max(msg.seqs) t0 = time.time() while True: try: assert (len(msg.seqs) == msg.seqs.count(stop_value)) break except: if time.time() - t0 > 2: logging.error("msg.seqs aren't all equal: {}".format( msg.seqs)) raise time.sleep(0.1) # Create await_values for the sink based on the stop values from # the multi sequence generator await_values = [] for part, val in enumerate(msg.seqs): key = '{:07d}'.format(part).encode() data = '[{},{},{},{}]'.format( *[val - x for x in range(3, -1, -1)]).encode() await_values.append((key, data)) cluster.sink_await(values=await_values, func=parse_sink_value) logging.info("Completion condition achieved. Shutting down cluster.") # Use validator to validate the data in at-least-once mode # save sink data to a file if validate_output: # TODO: move to validations.py out_file = os.path.join(cluster.res_dir, 'received.txt') cluster.sinks[0].save(out_file) # Validate captured output logging.info("Validating output") # if senders == 0, using internal source if cluster.senders: cmd_validate = ( 'validator -i {out_file} -e {expect} -a'.format( out_file=out_file, expect=stop_value)) else: cmd_validate = ('validator -i {out_file} -a'.format( out_file=out_file)) res = run_shell_cmd(cmd_validate) try: assert (res.success) logging.info("Validation successful") except: raise AssertionError('Validation failed with the following ' 'error:\n{}'.format(res.output)) # Validate worker actually underwent recovery if cluster.restarted_workers: # TODO: move to validations.py logging.info("Validating recovery") pattern = "RESILIENCE\: Replayed \d+ entries from recovery log file\." for r in cluster.restarted_workers: stdout = r.get_output() try: assert (re.search(pattern, stdout) is not None) logging.info("{} recovered successfully".format(r.name)) except AssertionError: raise AssertionError( 'Worker {} does not appear to have performed ' 'recovery as expected.'.format(r.name))
def test_state_entity_count_query(): with Cluster(command=CMD, workers=2) as cluster: given_data_sent(cluster) q = Query(cluster, "state-entity-count-query") assert q.result() == {u'DummyState': 1, u'PartitionedDummyState': 7}
def test_stateless_partition_count_query(): with Cluster(command=CMD, workers=2) as cluster: got = Query(cluster, "stateless-partition-count-query").result() for (k, v) in got.items(): assert int(k) assert v == {u"initializer": 1, u"worker1": 1}
def _run(command, persistent_data): host = '127.0.0.1' sources = 1 sinks = 1 sink_mode = 'framed' workers = 2 expect = 200 last_value_0 = '[{}]'.format(','.join( (str(expect - v) for v in range(6, -2, -2)))).encode() last_value_1 = '[{}]'.format(','.join( (str(expect - 1 - v) for v in range(6, -2, -2)))).encode() await_values = (struct.pack('>I', len(last_value_0)) + last_value_0, struct.pack('>I', len(last_value_1)) + last_value_1) # Start cluster with Cluster(command=command, host=host, sources=sources, workers=workers, sinks=sinks, sink_mode=sink_mode, persistent_data=persistent_data) as cluster: # Create sender logging.debug("Creating sender") sender = Sender(cluster.source_addrs[0], Reader(sequence_generator(expect)), batch_size=1, interval=0.05, reconnect=True) cluster.add_sender(sender, start=True) # wait for some data to go through the system time.sleep(0.5) # stop worker in a non-graceful fashion so that recovery files # aren't removed logging.debug("Killing worker") killed = cluster.kill_worker(worker=-1) ## restart worker logging.debug("Restarting worker") cluster.restart_worker(killed) # wait until sender completes (~1 second) logging.debug("Waiting for sender to complete") cluster.wait_for_sender() # Wait for the last sent value expected at the worker logging.debug("Waiting for sink to complete") cluster.sink_await(await_values) # stop the cluster logging.debug("Stopping cluster") cluster.stop_cluster() logging.debug("validating restarted worker stdout") # Validate worker actually underwent recovery pattern_restarting = "Restarting a listener ..." try: assert (re.search(pattern_restarting, persistent_data['runner_data'][2].stdout) is not None) except AssertionError: raise AssertionError('Worker does not appear to have reconnected ' 'as expected. Worker output is ' 'included below.\nSTDOUT\n---\n%s' % stdout)
def _run(command, runner_data=[]): host = '127.0.0.1' sources = 1 sinks = 1 sink_mode = 'framed' workers = 2 expect = 2000 last_value_0 = '[{}]'.format(','.join( (str(expect - v) for v in range(6, -2, -2)))) last_value_1 = '[{}]'.format(','.join( (str(expect - 1 - v) for v in range(6, -2, -2)))) await_values = (struct.pack('>I', len(last_value_0)) + last_value_0, struct.pack('>I', len(last_value_1)) + last_value_1) # Start cluster with Cluster(command=command, host=host, sources=sources, workers=workers, sinks=sinks, sink_mode=sink_mode, runner_data=runner_data) as cluster: # Create sender logging.debug("Creating sender") sender = Sender(cluster.source_addrs[0], Reader(sequence_generator(expect)), batch_size=100, interval=0.05, reconnect=True) cluster.add_sender(sender, start=True) # wait for some data to go through the system time.sleep(0.2) # stop worker in a non-graceful fashion so that recovery files # aren't removed logging.debug("Killing worker") killed = cluster.kill_worker(worker=-1) ## restart worker logging.debug("Restarting worker") cluster.restart_worker(killed) # wait until sender completes (~1 second) logging.debug("Waiting for sender to complete") cluster.wait_for_sender() # Wait for the last sent value expected at the worker logging.debug("Waiting for sink to complete") cluster.sink_await(await_values) # stop the cluster logging.debug("Stopping cluster") cluster.stop_cluster() # Use validator to validate the data in at-least-once mode # save sink data to a file out_file = os.path.join(cluster.res_dir, 'received.txt') cluster.sinks[0].save(out_file, mode='giles') # Validate captured output logging.debug("Validating output") cmd_validate = ('validator -i {out_file} -e {expect} -a'.format( out_file=out_file, expect=expect)) res = run_shell_cmd(cmd_validate) try: assert (res.success) except AssertionError: raise AssertionError('Output validation failed with the following ' 'error:\n{}'.format(res.output)) # Validate worker actually underwent recovery logging.debug("Validating recovery from worker stdout") pattern = "RESILIENCE\: Replayed \d+ entries from recovery log file\." try: assert (re.search(pattern, cluster.runners[-1].get_output()) is not None) except AssertionError: raise AssertionError("Worker does not appear to have performed " "recovery as expected.")
def _autoscale_run(command, ops=[], cycles=1, initial=None, runner_data=[], as_steps=[]): host = '127.0.0.1' sources = 1 sinks = 1 sink_mode = 'framed' if isinstance(ops, int): ops = [ops] # If no initial workers value is given, determine the minimum number # required at the start so that the cluster never goes below 1 worker. # If a number is given, then verify it is sufficient. if ops: lowest = lowest_point(ops * cycles) if lowest < 1: min_workers = abs(lowest) + 1 else: min_workers = 1 if isinstance(initial, int): assert (initial >= min_workers) workers = initial else: workers = min_workers else: # Test is only for setup using initial workers assert (initial > 0) workers = initial batch_size = 10 interval = 0.05 lowercase2 = [a + b for a in lowercase for b in lowercase] char_cycle = cycle(lowercase2) expected = Counter() def count_sent(s): expected[s] += 1 reader = Reader( iter_generator(items=char_cycle, to_string=lambda s: pack('>2sI', s, 1), on_next=count_sent)) # Start cluster logging.debug("Creating cluster") with Cluster(command=command, host=host, sources=sources, workers=workers, sinks=sinks, sink_mode=sink_mode, runner_data=runner_data) as cluster: # Create sender logging.debug("Creating sender") sender = Sender(cluster.source_addrs[0], reader, batch_size=50, interval=0.05, reconnect=True) cluster.add_sender(sender, start=True) # wait for some data to go through the system time.sleep(1) # Perform autoscale cycles logging.debug("Starting autoscale cycles") for cyc in range(cycles): for joiners in ops: # Verify cluster is processing before proceeding cluster.wait_to_resume_processing(timeout=120) # Test for crashed workers assert (not cluster.get_crashed_workers()) # get partition data before autoscale operation begins logging.debug("Get partition data before autoscale event") pre_partitions = cluster.get_partition_data() as_steps.append(joiners) joined = [] left = [] if joiners > 0: # autoscale: grow # create new workers and have them join logging.debug("grow by {}".format(joiners)) joined = cluster.grow(by=joiners) elif joiners < 0: # autoscale: shrink # choose the most recent, still-alive runners to leave leavers = abs(joiners) left = cluster.shrink(leavers) else: # Handle the 0 case as a noop continue # Wait until all live workers report 'ready' cluster.wait_to_resume_processing(timeout=120) # Test for crashed workers assert (not cluster.get_crashed_workers()) # Wait a second before the next operation, allowing some # more data to go through the system time.sleep(1) logging.debug("end of autoscale iteration") logging.debug("End of autoscale cycle") logging.debug("End of autoscale events. Entering final validation") time.sleep(2) # Test for crashed workers logging.debug("check for crashed") assert (not cluster.get_crashed_workers()) # Test is done, so stop sender cluster.stop_senders() # wait until sender sends out its final batch and exits cluster.wait_for_sender() logging.info('Sender sent {} messages'.format(sum(expected.values()))) # Use Sink value to determine when to stop runners and sink pack677 = '>I2sQ' await_values = [ pack(pack677, calcsize(pack677) - 4, c, v) for c, v in expected.items() ] cluster.sink_await(await_values, timeout=120) # validate output phase_validate_output(cluster.sinks[0].data, expected)