def test_daemon(self): pidfile = os.path.join(self.test_root, "daemon.pid") flagfile = os.path.join(self.test_root, "daemon.flag") open(flagfile, 'w').close() if not os.fork(): self._cleanups = [] daemon_test(pidfile, flagfile) for x in iterate_timeout(30, "daemon to start"): if os.path.exists(pidfile): break os.unlink(flagfile) for x in iterate_timeout(30, "daemon to stop"): if not os.path.exists(pidfile): break
def test_bubblewrap_leak(self): bwrap = bubblewrap.BubblewrapDriver() context = bwrap.getExecutionContext() work_dir = tempfile.mkdtemp() ansible_dir = tempfile.mkdtemp() ssh_agent = SshAgent() self.addCleanup(ssh_agent.stop) ssh_agent.start() po = context.getPopen(work_dir=work_dir, ansible_dir=ansible_dir, ssh_auth_sock=ssh_agent.env['SSH_AUTH_SOCK']) leak_time = 60 # Use hexadecimal notation to avoid false-positive true_proc = po(['bash', '-c', 'sleep 0x%X & disown' % leak_time]) self.assertEqual(0, true_proc.wait()) cmdline = "sleep\x000x%X\x00" % leak_time for x in iterate_timeout(30, "process to exit"): try: sleep_proc = [ pid for pid in os.listdir("/proc") if os.path.isfile("/proc/%s/cmdline" % pid) and open("/proc/%s/cmdline" % pid).read() == cmdline ] if not sleep_proc: break except FileNotFoundError: pass except ProcessLookupError: pass time.sleep(1)
def daemon_test(pidfile, flagfile): pid = pid_file_module.TimeoutPIDLockFile(pidfile, 10) with daemon.DaemonContext(pidfile=pid): for x in iterate_timeout(30, "flagfile to be removed"): if not os.path.exists(flagfile): break sys.exit(0)
def test_bubblewrap_leak(self): bwrap = bubblewrap.BubblewrapDriver() context = bwrap.getExecutionContext() work_dir = tempfile.mkdtemp() ansible_dir = tempfile.mkdtemp() ssh_agent = SshAgent() self.addCleanup(ssh_agent.stop) ssh_agent.start() po = context.getPopen(work_dir=work_dir, ansible_dir=ansible_dir, ssh_auth_sock=ssh_agent.env['SSH_AUTH_SOCK']) leak_time = 60 # Use hexadecimal notation to avoid false-positive true_proc = po(['bash', '-c', 'sleep 0x%X & disown' % leak_time]) self.assertEqual(0, true_proc.wait()) cmdline = "sleep\x000x%X\x00" % leak_time for x in iterate_timeout(30, "process to exit"): try: sleep_proc = [pid for pid in os.listdir("/proc") if os.path.isfile("/proc/%s/cmdline" % pid) and open("/proc/%s/cmdline" % pid).read() == cmdline] if not sleep_proc: break except FileNotFoundError: pass except ProcessLookupError: pass time.sleep(1)
def test_dequeue(self): """Test that the Web client can dequeue a change""" self.executor_server.hold_jobs_in_build = True start_builds = len(self.builds) self.create_branch('org/project', 'stable') self.executor_server.hold_jobs_in_build = True self.commitConfigUpdate('common-config', 'layouts/timer.yaml') self.scheds.execute(lambda app: app.sched.reconfigure(app.config)) self.waitUntilSettled() for _ in iterate_timeout(30, 'Wait for a build on hold'): if len(self.builds) > start_builds: break self.waitUntilSettled() authz = {'iss': 'zuul_operator', 'aud': 'zuul.example.com', 'sub': 'testuser', 'zuul': { 'admin': ['tenant-one', ] }, 'exp': time.time() + 3600} token = jwt.encode(authz, key='NoDanaOnlyZuul', algorithm='HS256').decode('utf-8') p = subprocess.Popen( ['zuul-client', '--zuul-url', self.base_url, '--auth-token', token, '-v', 'dequeue', '--tenant', 'tenant-one', '--project', 'org/project', '--pipeline', 'periodic', '--ref', 'refs/heads/stable'], stdout=subprocess.PIPE) output = p.communicate() self.assertEqual(p.returncode, 0, output) self.waitUntilSettled() self.commitConfigUpdate('common-config', 'layouts/no-timer.yaml') self.scheds.execute(lambda app: app.sched.reconfigure(app.config)) self.waitUntilSettled() self.executor_server.hold_jobs_in_build = False self.executor_server.release() self.waitUntilSettled() self.assertEqual(self.countJobResults(self.history, 'ABORTED'), 1)
def runFingerClient(self, build_uuid, gateway_address, event): # Wait until the gateway is started for x in iterate_timeout(30, "finger client to start"): try: # NOTE(Shrews): This causes the gateway to begin to handle # a request for which it never receives data, and thus # causes the getCommand() method to timeout (seen in the # test results, but is harmless). with socket.create_connection(gateway_address) as s: break except ConnectionRefusedError: pass with socket.create_connection(gateway_address) as s: msg = "%s\r\n" % build_uuid s.sendall(msg.encode('utf-8')) event.set() # notify we are connected and req sent while True: data = s.recv(1024) if not data: break self.streaming_data += data.decode('utf-8') s.shutdown(socket.SHUT_RDWR)
def test_websocket_streaming(self): # Start the web server web = self.useFixture( ZuulWebFixture(self.gearman_server.port, self.config)) # Start the finger streamer daemon streamer = zuul.lib.log_streamer.LogStreamer( self.host, 0, self.executor_server.jobdir_root) self.addCleanup(streamer.stop) # Need to set the streaming port before submitting the job finger_port = streamer.server.socket.getsockname()[1] self.executor_server.log_streaming_port = finger_port A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "build"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break build = self.builds[0] # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. We only open it (to force a file handle # to be kept open for it after the job finishes) but wait to read the # contents until the job is done. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break logfile = open(ansible_log, 'r') self.addCleanup(logfile.close) # Start a thread with the websocket client client1 = self.runWSClient(web.port, build.uuid) client1.event.wait() client2 = self.runWSClient(web.port, build.uuid) client2.event.wait() # Allow the job to complete flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() # Wait for the websocket client to complete, which it should when # it's received the full log. client1.thread.join() client2.thread.join() self.waitUntilSettled() file_contents = logfile.read() self.log.debug("\n\nFile contents: %s\n\n", file_contents) self.log.debug("\n\nStreamed: %s\n\n", client1.results) self.assertEqual(file_contents, client1.results) self.log.debug("\n\nStreamed: %s\n\n", client2.results) self.assertEqual(file_contents, client2.results)
def test_decode_boundaries(self): ''' Test multi-byte characters crossing read buffer boundaries. The finger client used by ZuulWeb reads in increments of 1024 bytes. If the last byte is a multi-byte character, we end up with an error similar to: 'utf-8' codec can't decode byte 0xe2 in position 1023: \ unexpected end of data By making the 1024th character in the log file a multi-byte character (here, the Euro character), we can test this. ''' # Start the web server web = self.useFixture( ZuulWebFixture(self.gearman_server.port, self.config)) # Start the finger streamer daemon streamer = zuul.lib.log_streamer.LogStreamer( self.host, 0, self.executor_server.jobdir_root) self.addCleanup(streamer.stop) # Need to set the streaming port before submitting the job finger_port = streamer.server.socket.getsockname()[1] self.executor_server.log_streaming_port = finger_port A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "builds"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break build = self.builds[0] # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. We only open it (to force a file handle # to be kept open for it after the job finishes) but wait to read the # contents until the job is done. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break # Replace log file contents with the 1024th character being a # multi-byte character. with io.open(ansible_log, 'w', encoding='utf8') as f: f.write("a" * 1023) f.write(u"\u20AC") logfile = open(ansible_log, 'r') self.addCleanup(logfile.close) # Start a thread with the websocket client client1 = self.runWSClient(web.port, build.uuid) client1.event.wait() # Allow the job to complete flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() # Wait for the websocket client to complete, which it should when # it's received the full log. client1.thread.join() self.waitUntilSettled() file_contents = logfile.read() logfile.close() self.log.debug("\n\nFile contents: %s\n\n", file_contents) self.log.debug("\n\nStreamed: %s\n\n", client1.results) self.assertEqual(file_contents, client1.results)
def test_streaming(self): A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "builds"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break build = self.builds[0] # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. We only open it (to force a file handle # to be kept open for it after the job finishes) but wait to read the # contents until the job is done. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break logfile = open(ansible_log, 'r') self.addCleanup(logfile.close) # Create a thread to stream the log. We need this to be happening # before we create the flag file to tell the job to complete. streamer_thread = threading.Thread( target=self.startStreamer, args=(0, build.uuid, self.executor_server.jobdir_root,) ) streamer_thread.start() self.addCleanup(self.stopStreamer) self.test_streaming_event.wait() # Allow the job to complete, which should close the streaming # connection (and terminate the thread) as well since the log file # gets closed/deleted. flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() self.waitUntilSettled() streamer_thread.join() # Now that the job is finished, the log file has been closed by the # job and deleted. However, we still have a file handle to it, so we # can make sure that we read the entire contents at this point. # Compact the returned lines into a single string for easy comparison. file_contents = logfile.read() logfile.close() self.log.debug("\n\nFile contents: %s\n\n", file_contents) self.log.debug("\n\nStreamed: %s\n\n", self.streaming_data) self.assertEqual(file_contents, self.streaming_data) # Check that we logged a multiline debug message pattern = (r'^\d\d\d\d-\d\d-\d\d \d\d:\d\d\:\d\d\.\d\d\d\d\d\d \| ' r'Debug Test Token String$') r = re.compile(pattern, re.MULTILINE) match = r.search(self.streaming_data) self.assertNotEqual(match, None)
def test_finger_gateway(self): # Start the finger streamer daemon streamer = zuul.lib.log_streamer.LogStreamer( self.host, 0, self.executor_server.jobdir_root) self.addCleanup(streamer.stop) finger_port = streamer.server.socket.getsockname()[1] # Need to set the streaming port before submitting the job self.executor_server.log_streaming_port = finger_port A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "build"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break build = self.builds[0] # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. We only open it (to force a file handle # to be kept open for it after the job finishes) but wait to read the # contents until the job is done. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break logfile = open(ansible_log, 'r') self.addCleanup(logfile.close) # Start the finger gateway daemon gateway = zuul.lib.fingergw.FingerGateway( ('127.0.0.1', self.gearman_server.port, None, None, None), (self.host, 0), user=None, command_socket=None, pid_file=None) gateway.start() self.addCleanup(gateway.stop) gateway_port = gateway.server.socket.getsockname()[1] gateway_address = (self.host, gateway_port) # Start a thread with the finger client finger_client_event = threading.Event() self.finger_client_results = '' finger_client_thread = threading.Thread(target=self.runFingerClient, args=(build.uuid, gateway_address, finger_client_event)) finger_client_thread.start() finger_client_event.wait() # Allow the job to complete flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() # Wait for the finger client to complete, which it should when # it's received the full log. finger_client_thread.join() self.waitUntilSettled() file_contents = logfile.read() logfile.close() self.log.debug("\n\nFile contents: %s\n\n", file_contents) self.log.debug("\n\nStreamed: %s\n\n", self.streaming_data) self.assertEqual(file_contents, self.streaming_data)
def test_websocket_hangup(self): # Start the web server web = self.useFixture( ZuulWebFixture(self.gearman_server.port, self.config)) # Start the finger streamer daemon streamer = zuul.lib.log_streamer.LogStreamer( self.host, 0, self.executor_server.jobdir_root) self.addCleanup(streamer.stop) # Need to set the streaming port before submitting the job finger_port = streamer.server.socket.getsockname()[1] self.executor_server.log_streaming_port = finger_port A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "build"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break build = self.builds[0] # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break # Start a thread with the websocket client client1 = self.runWSClient(web.port, build.uuid) client1.event.wait() # Wait until we've streamed everything so far for x in iterate_timeout(30, "streamer is caught up"): with open(ansible_log, 'r') as logfile: if client1.results == logfile.read(): break # This is intensive, give it some time time.sleep(1) self.assertNotEqual(len(web.web.stream_manager.streamers.keys()), 0) # Hangup the client side client1.close(1000, 'test close') client1.thread.join() # The client should be de-registered shortly for x in iterate_timeout(30, "client cleanup"): if len(web.web.stream_manager.streamers.keys()) == 0: break # Allow the job to complete flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() self.waitUntilSettled()
def test_streaming(self): A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "builds"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break build = self.builds[0] # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. We only open it (to force a file handle # to be kept open for it after the job finishes) but wait to read the # contents until the job is done. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break logfile = open(ansible_log, 'r') self.addCleanup(logfile.close) # Create a thread to stream the log. We need this to be happening # before we create the flag file to tell the job to complete. streamer_thread = threading.Thread( target=self.startStreamer, args=( 0, build.uuid, self.executor_server.jobdir_root, )) streamer_thread.start() self.addCleanup(self.stopStreamer) self.test_streaming_event.wait() # Allow the job to complete, which should close the streaming # connection (and terminate the thread) as well since the log file # gets closed/deleted. flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() self.waitUntilSettled() streamer_thread.join() # Now that the job is finished, the log file has been closed by the # job and deleted. However, we still have a file handle to it, so we # can make sure that we read the entire contents at this point. # Compact the returned lines into a single string for easy comparison. file_contents = logfile.read() logfile.close() self.log.debug("\n\nFile contents: %s\n\n", file_contents) self.log.debug("\n\nStreamed: %s\n\n", self.streaming_data) self.assertEqual(file_contents, self.streaming_data) # Check that we logged a multiline debug message pattern = (r'^\d\d\d\d-\d\d-\d\d \d\d:\d\d\:\d\d\.\d\d\d\d\d\d \| ' r'Debug Test Token String$') r = re.compile(pattern, re.MULTILINE) match = r.search(self.streaming_data) self.assertNotEqual(match, None)
def test_slow_start(self, loadavg_mock): loadavg_mock.return_value = (0.0, 0.0, 0.0) def _set_starting_builds(min, max): for sensor in self.executor_server.sensors: if isinstance(sensor, StartingBuildsSensor): sensor.min_starting_builds = min sensor.max_starting_builds = max # Note: This test relies on the fact that manageLoad is only # run at specific points. Several times in the test we check # that manageLoad has disabled or enabled job acceptance based # on the number of "starting" jobs. Some of those jobs may # have actually moved past the "starting" phase and are # actually "running". But because manageLoad hasn't run # again, it still uses the old values. Keep this in mind when # double checking its calculations. # # Disable the periodic governor runs to make sure they don't # interefere (only possible if the test runs longer than 10 # seconds). self.executor_server.governor_stop_event.set() self.executor_server.hold_jobs_in_build = True _set_starting_builds(1, 1) self.executor_server.manageLoad() self.assertTrue(self.executor_server.accepting_work) A = self.fake_gerrit.addFakeChange('common-config', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) build1 = self.waitForExecutorBuild('test1') # With one job (test1) being started, we should no longer # be accepting new work self.assertFalse(self.executor_server.accepting_work) self.assertEqual(len(self.executor_server.job_workers), 1) # Allow enough starting builds for the test to complete. _set_starting_builds(1, 3) # We must wait for build1 to enter a waiting state otherwise # the subsequent release() is a noop and the build is never # released. We don't use waitUntilSettled as that requires # the other two builds to start which can't happen while we # don't accept jobs. for x in iterate_timeout(30, "build1 is waiting"): if build1.waiting: break build1.release() for x in iterate_timeout(30, "Wait for build1 to complete"): if build1.uuid not in self.executor_server.job_workers: break self.executor_server.manageLoad() # This manageLoad call has determined that there are 0 workers # running, so our full complement of 3 starting builds is # available. It will re-register for work and pick up the # next two jobs. self.waitForExecutorBuild('test2') self.waitForExecutorBuild('test3') # When each of these jobs started, they caused manageLoad to # be called, the second invocation calculated that there were # 2 workers running, so our starting build limit was reduced # to 1. Usually it will calculate that there are 2 starting # builds, but theoretically it could count only 1 if the first # build manages to leave the starting phase before the second # build starts. It should always count the second build as # starting. As long as at least one build is still in the # starting phase, this will exceed the limit and unregister. self.assertFalse(self.executor_server.accepting_work) self.executor_server.hold_jobs_in_build = False self.executor_server.release() self.waitUntilSettled() self.executor_server.manageLoad() self.assertTrue(self.executor_server.accepting_work)
def test_finger_gateway(self): # Start the finger streamer daemon streamer = zuul.lib.log_streamer.LogStreamer( self.host, 0, self.executor_server.jobdir_root) self.addCleanup(streamer.stop) finger_port = streamer.server.socket.getsockname()[1] # Need to set the streaming port before submitting the job self.executor_server.log_streaming_port = finger_port A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) # We don't have any real synchronization for the ansible jobs, so # just wait until we get our running build. for x in iterate_timeout(30, "build"): if len(self.builds): break build = self.builds[0] self.assertEqual(build.name, 'python27') build_dir = os.path.join(self.executor_server.jobdir_root, build.uuid) for x in iterate_timeout(30, "build dir"): if os.path.exists(build_dir): break # Need to wait to make sure that jobdir gets set for x in iterate_timeout(30, "jobdir"): if build.jobdir is not None: break # Wait for the job to begin running and create the ansible log file. # The job waits to complete until the flag file exists, so we can # safely access the log here. We only open it (to force a file handle # to be kept open for it after the job finishes) but wait to read the # contents until the job is done. ansible_log = os.path.join(build.jobdir.log_root, 'job-output.txt') for x in iterate_timeout(30, "ansible log"): if os.path.exists(ansible_log): break logfile = open(ansible_log, 'r') self.addCleanup(logfile.close) # Start the finger gateway daemon gateway = zuul.lib.fingergw.FingerGateway( ('127.0.0.1', self.gearman_server.port, None, None, None), (self.host, 0), user=None, command_socket=None, pid_file=None ) gateway.start() self.addCleanup(gateway.stop) gateway_port = gateway.server.socket.getsockname()[1] gateway_address = (self.host, gateway_port) # Start a thread with the finger client finger_client_event = threading.Event() self.finger_client_results = '' finger_client_thread = threading.Thread( target=self.runFingerClient, args=(build.uuid, gateway_address, finger_client_event) ) finger_client_thread.start() finger_client_event.wait() # Allow the job to complete flag_file = os.path.join(build_dir, 'test_wait') open(flag_file, 'w').close() # Wait for the finger client to complete, which it should when # it's received the full log. finger_client_thread.join() self.waitUntilSettled() file_contents = logfile.read() logfile.close() self.log.debug("\n\nFile contents: %s\n\n", file_contents) self.log.debug("\n\nStreamed: %s\n\n", self.streaming_data) self.assertEqual(file_contents, self.streaming_data)