Exemple #1
0
def _test_log_rotation_external_trigger_no_recovery(command):
    host = '127.0.0.1'
    sources = 1
    workers = 2
    res_dir = '/tmp/res-data'
    expect = 2000
    last_value_0 = '[{}]'.format(','.join(
        (str(expect - v) for v in range(6, -2, -2))))
    last_value_1 = '[{}]'.format(','.join(
        (str(expect - 1 - v) for v in range(6, -2, -2))))
    await_values = (struct.pack('>I', len(last_value_0)) + last_value_0,
                    struct.pack('>I', len(last_value_1)) + last_value_1)

    setup_resilience_path(res_dir)

    command = '''{} \
        --log-rotation \
        --stop-pause {}
    '''.format(command, STOP_THE_WORLD_PAUSE)

    runners = []
    try:
        # Create sink, metrics, reader, sender
        sink = Sink(host)
        metrics = Metrics(host)
        reader = Reader(sequence_generator(expect))

        # Start sink and metrics, and get their connection info
        sink.start()
        sink_host, sink_port = sink.get_connection_info()
        outputs = '{}:{}'.format(sink_host, sink_port)

        metrics.start()
        metrics_host, metrics_port = metrics.get_connection_info()
        time.sleep(0.05)

        input_ports, control_port, external_port, data_port = (get_port_values(
            host, sources))
        inputs = ','.join(['{}:{}'.format(host, p) for p in input_ports])

        start_runners(runners, command, host, inputs, outputs, metrics_port,
                      control_port, external_port, data_port, res_dir, workers)

        # Wait for first runner (initializer) to report application ready
        runner_ready_checker = RunnerReadyChecker(runners[0], timeout=30)
        runner_ready_checker.start()
        runner_ready_checker.join()
        if runner_ready_checker.error:
            raise runner_ready_checker.error

        # start sender
        sender = Sender(host,
                        input_ports[0],
                        reader,
                        batch_size=100,
                        interval=0.05)
        sender.start()

        time.sleep(0.5)
        # Trigger log rotation with external message
        cmd_external_trigger = ('external_sender -e {}:{} -t rotate-log -m '
                                'worker1'.format(host, external_port))

        success, stdout, retcode, cmd = ex_validate(cmd_external_trigger)
        try:
            assert (success)
        except AssertionError:
            raise AssertionError('External rotation trigger failed with '
                                 'the error:\n{}'.format(stdout))

        # wait until sender completes (~1 second)
        sender.join(30)
        if sender.error:
            raise sender.error
        if sender.is_alive():
            sender.stop()
            raise TimeoutError('Sender did not complete in the expected '
                               'period')

        # Use metrics to determine when to stop runners and sink
        stopper = SinkAwaitValue(sink, await_values, AWAIT_TIMEOUT)
        stopper.start()
        stopper.join()
        if stopper.error:
            for r in runners:
                print r.name
                print r.get_output()[0]
                print '---'
            raise stopper.error

        # stop application workers
        for r in runners:
            r.stop()

        # Stop sink
        sink.stop()
        print 'sink.data size: ', len(sink.data)

        # Use validator to validate the data in at-least-once mode
        # save sink data to a file
        out_file = os.path.join(res_dir, 'received.txt')
        sink.save(out_file, mode='giles')

        # Validate captured output
        cmd_validate = ('validator -i {out_file} -e {expect} -a'.format(
            out_file=out_file, expect=expect))
        success, stdout, retcode, cmd = ex_validate(cmd_validate)
        try:
            assert (success)
        except AssertionError:
            print runners[0].name
            print runners[0].get_output()[0]
            print '---'
            print runners[1].name
            print runners[1].get_output()[0]
            print '---'
            raise AssertionError('Validation failed with the following '
                                 'error:\n{}'.format(stdout))

        # Validate all workers underwent log rotation
        for r in runners[1:]:
            stdout, stderr = r.get_output()
            try:
                assert (re.search(log_rotated_pattern, stdout, re.M | re.S)
                        is not None)
            except AssertionError:
                raise AssertionError('Worker %r does not appear to have '
                                     'performed log rotation as expected.'
                                     ' The pattern %r '
                                     'is missing form the Worker output '
                                     'included below.\nSTDOUT\n---\n%s\n'
                                     '---\n' %
                                     (r.name, log_rotated_pattern, stdout))
    finally:
        for r in runners:
            r.stop()
        clean_up_resilience_path(res_dir)
Exemple #2
0
def _test_recovery(command):
    host = '127.0.0.1'
    sources = 1
    workers = 2
    res_dir = '/tmp/res-data'
    expect = 2000
    last_value_0 = '[{}]'.format(','.join(
        (str(expect - v) for v in range(6, -2, -2))))
    last_value_1 = '[{}]'.format(','.join(
        (str(expect - 1 - v) for v in range(6, -2, -2))))

    await_values = (struct.pack('>I', len(last_value_0)) + last_value_0,
                    struct.pack('>I', len(last_value_1)) + last_value_1)

    setup_resilience_path(res_dir)

    runners = []
    try:
        # Create sink, metrics, reader, sender
        sink = Sink(host)
        metrics = Metrics(host)
        reader = Reader(sequence_generator(expect))

        # Start sink and metrics, and get their connection info
        sink.start()
        sink_host, sink_port = sink.get_connection_info()
        outputs = '{}:{}'.format(sink_host, sink_port)

        metrics.start()
        metrics_host, metrics_port = metrics.get_connection_info()
        time.sleep(0.05)

        input_ports, control_port, external_port, data_port = (get_port_values(
            host, sources))
        inputs = ','.join(['{}:{}'.format(host, p) for p in input_ports])

        start_runners(runners, command, host, inputs, outputs, metrics_port,
                      control_port, external_port, data_port, res_dir, workers)

        # Wait for first runner (initializer) to report application ready
        runner_ready_checker = RunnerReadyChecker(runners[0], timeout=30)
        runner_ready_checker.start()
        runner_ready_checker.join()
        if runner_ready_checker.error:
            raise runner_ready_checker.error

        # start sender
        sender = Sender(host,
                        input_ports[0],
                        reader,
                        batch_size=100,
                        interval=0.05)
        sender.start()
        time.sleep(0.2)

        # stop worker
        runners[-1].stop()

        ## restart worker
        runners.append(runners[-1].respawn())
        runners[-1].start()

        # wait until sender completes (~1 second)
        sender.join(5)
        if sender.error:
            raise sender.error
        if sender.is_alive():
            sender.stop()
            raise TimeoutError('Sender did not complete in the expected '
                               'period')

        # Use metrics to determine when to stop runners and sink
        stopper = SinkAwaitValue(sink, await_values, 30)
        stopper.start()
        stopper.join()
        if stopper.error:
            raise stopper.error

        # stop application workers
        for r in runners:
            r.stop()

        # Stop sink
        sink.stop()
        print 'sink.data size: ', len(sink.data)

        # Use validator to validate the data in at-least-once mode
        # save sink data to a file
        out_file = os.path.join(res_dir, 'received.txt')
        sink.save(out_file, mode='giles')

        # Validate captured output
        cmd_validate = ('validator -i {out_file} -e {expect} -a'.format(
            out_file=out_file, expect=expect))
        success, stdout, retcode, cmd = ex_validate(cmd_validate)
        try:
            assert (success)
        except AssertionError:
            print runners[-1].get_output()[0]
            print '---'
            print runners[-2].get_output()[0]
            print '---'
            raise AssertionError('Validation failed with the following '
                                 'error:\n{}'.format(stdout))

        # Validate worker actually underwent recovery
        pattern = "RESILIENCE\: Replayed \d+ entries from recovery log file\."
        stdout, stderr = runners[-1].get_output()
        try:
            assert (re.search(pattern, stdout) is not None)
        except AssertionError:
            raise AssertionError('Worker does not appear to have performed '
                                 'recovery as expected. Worker output is '
                                 'included below.\nSTDOUT\n---\n%s\n---\n'
                                 'STDERR\n---\n%s' % (stdout, stderr))

    finally:
        for r in runners:
            r.stop()
        clean_up_resilience_path(res_dir)
Exemple #3
0
def _test_restart(command):

    host = '127.0.0.1'
    sources = 1
    workers = 2
    res_dir = '/tmp/res-data'
    expect = 200
    last_value_0 = '[{}]'.format(','.join(
        (str(expect - v) for v in range(6, -2, -2))))
    last_value_1 = '[{}]'.format(','.join(
        (str(expect - 1 - v) for v in range(6, -2, -2))))
    await_values = (struct.pack('>I', len(last_value_0)) + last_value_0,
                    struct.pack('>I', len(last_value_1)) + last_value_1)

    setup_resilience_path(res_dir)

    runners = []
    try:
        # Create sink, metrics, reader, sender
        sink = Sink(host)
        metrics = Metrics(host)
        reader = Reader(sequence_generator(expect))

        # Start sink and metrics, and get their connection info
        sink.start()
        sink_host, sink_port = sink.get_connection_info()
        outputs = '{}:{}'.format(sink_host, sink_port)

        metrics.start()
        metrics_host, metrics_port = metrics.get_connection_info()
        time.sleep(0.05)

        input_ports, control_port, external_port, data_port = (get_port_values(
            host, sources))
        inputs = ','.join(['{}:{}'.format(host, p) for p in input_ports])

        start_runners(runners, command, host, inputs, outputs, metrics_port,
                      control_port, external_port, data_port, res_dir, workers)

        # Wait for first runner (initializer) to report application ready
        runner_ready_checker = RunnerReadyChecker(runners[0], timeout=30)
        runner_ready_checker.start()
        runner_ready_checker.join()
        if runner_ready_checker.error:
            raise runner_ready_checker.error

        # start sender
        sender = Sender(host,
                        input_ports[0],
                        reader,
                        batch_size=1,
                        interval=0.05,
                        reconnect=True)
        sender.start()
        time.sleep(0.2)

        # stop worker
        runners[-1].stop()

        ## restart worker
        runners.append(runners[-1].respawn())
        runners[-1].start()

        # wait until sender completes (~1 second)
        sender.join(30)
        if sender.error:
            raise sender.error
        if sender.is_alive():
            sender.stop()
            raise TimeoutError('Sender did not complete in the expected '
                               'period')

        # Wait for the last sent value expected at the worker
        stopper = SinkAwaitValue(sink, await_values, 30)
        stopper.start()
        stopper.join()
        if stopper.error:
            for r in runners:
                print r.name
                print r.get_output()[0]
                print '---'
            print 'sink data'
            print sink.data
            print '---'
            raise stopper.error

        # stop application workers
        for r in runners:
            r.stop()

        # Stop sink
        sink.stop()

        # Validate worker actually underwent recovery
        pattern_restarting = "Restarting a listener ..."
        stdout, stderr = runners[-1].get_output()
        try:
            assert (re.search(pattern_restarting, stdout) is not None)
        except AssertionError:
            raise AssertionError('Worker does not appear to have reconnected '
                                 'as expected. Worker output is '
                                 'included below.\nSTDOUT\n---\n%s\n---\n'
                                 'STDERR\n---\n%s' % (stdout, stderr))
    finally:
        for r in runners:
            r.stop()
        clean_up_resilience_path(res_dir)