def test_start_and_block_until_up_doesnt_raise_exception_if_master_service_is_up(self): self._mock_shell_exec_command({ 'nohup some_path master --port 43000 &': "\n", 'ps ax | grep \'[s]ome_path\'': "\nsome_path\n", }) self.patch('app.deployment.remote_master_service.ServiceRunner').return_value.is_up.return_value = True remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') remote_master_service.start_and_block_until_up(43000, 5)
def test_start_and_block_until_up_raises_exception_if_process_fails_to_start(self): self._mock_shell_exec_command({ 'nohup some_path master --port 43000 &': "\n", 'ps ax | grep \'[s]ome_path\'': "\n", }) remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') with self.assertRaisesRegex(SystemExit, '1'): remote_master_service.start_and_block_until_up(43000, 5)
def test_start_and_block_until_up_raises_exception_if_process_starts_by_service_doesnt_respond(self): self._mock_shell_exec_command({ 'nohup some_path master --port 43000 &': "\n", 'ps ax | grep \'[s]ome_path\'': "\nsome_path\n", }) self.patch('app.deployment.remote_master_service.ServiceRunner').return_value.is_up.return_value = False remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') with self.assertRaisesRegex(SystemExit, '1'): remote_master_service.start_and_block_until_up(43000, 5)
def _start_services(self, master, master_port, slaves, slave_port, num_executors, username, clusterrunner_executable): """ Stop and start the appropriate clusterrunner services on all machines. :param master: master hostnames :type master: str :param master_port: master's port :type master_port: int :param slaves: slave hostnames :type slaves: list[str] :param slave_port: slave's port :type slave_port: int :param num_executors: number of concurrent executors :type num_executors: int :param username: current username :type username: str :param clusterrunner_executable: where the clusterrunner executable on the remote hosts is expected to be :type clusterrunner_executable: str """ self._logger.debug('Adding {} as a master service'.format(master)) try: master_service = RemoteMasterService(master, username, clusterrunner_executable) master_service.stop() except socket.gaierror: self._logger.error('Master host {} is unreachable, unable to instantiate service.'.format(master)) raise SystemExit(1) slave_services = [] for slave in slaves: self._logger.debug('Adding {} as a slave service'.format(slave)) try: slave_service = RemoteSlaveService(slave, username, clusterrunner_executable) slave_service.stop() slave_services.append(slave_service) except socket.gaierror: self._logger.error('Slave host {} is unreachable, unable to instantiate service.'.format(slave)) self._logger.debug('Starting master service on {}:{}'.format(master_service.host, master_port)) master_service.start_and_block_until_up(master_port) self._logger.debug('Starting slave services') for slave_service in slave_services: try: slave_service.start(master, master_port, slave_port, num_executors) except: # pylint: disable=bare-except self._logger.error('Failed to start slave service on {}.'.format(slave_service.host))
def _start_services(self, master, master_port, slaves, slave_port, num_executors, username, clusterrunner_executable): """ Stop and start the appropriate clusterrunner services on all machines. :param master: master hostnames :type master: str :param master_port: master's port :type master_port: int :param slaves: slave hostnames :type slaves: list[str] :param slave_port: slave's port :type slave_port: int :param num_executors: number of concurrent executors :type num_executors: int :param username: current username :type username: str :param clusterrunner_executable: where the clusterrunner executable on the remote hosts is expected to be :type clusterrunner_executable: str """ # We want to stop slave services before the master service, as that is a more graceful shutdown and also # reduces the risk of a race condition where the slave service sends a slave-shutdown request to the master # after the new master service starts. self._logger.debug('Stopping all slave services') slave_services = [RemoteSlaveService(slave, username, clusterrunner_executable) for slave in slaves] Pool().map(lambda slave_service: slave_service.stop(), slave_services) self._logger.debug('Stopping master service on {}...'.format(master)) master_service = RemoteMasterService(master, username, clusterrunner_executable) master_service.stop() self._logger.debug('Starting master service on {}:{}'.format(master_service.host, master_port)) master_service.start_and_block_until_up(master_port) self._logger.debug('Starting slave services') for slave_service in slave_services: try: slave_service.start(master, master_port, slave_port, num_executors) except Exception as e: # pylint: disable=broad-except self._logger.error('Failed to start slave service on {} with message: {}'.format(slave_service.host, e))
def _start_services(self, master, master_port, slaves, slave_port, num_executors, username, clusterrunner_executable): """ Stop and start the appropriate clusterrunner services on all machines. :param master: master hostnames :type master: str :param master_port: master's port :type master_port: int :param slaves: slave hostnames :type slaves: list[str] :param slave_port: slave's port :type slave_port: int :param num_executors: number of concurrent executors :type num_executors: int :param username: current username :type username: str :param clusterrunner_executable: where the clusterrunner executable on the remote hosts is expected to be :type clusterrunner_executable: str """ # We want to stop slave services before the master service, as that is a more graceful shutdown and also # reduces the risk of a race condition where the slave service sends a slave-shutdown request to the master # after the new master service starts. self._logger.debug('Stopping all slave services') slave_services = [ RemoteSlaveService(slave, username, clusterrunner_executable) for slave in slaves ] Pool().map(lambda slave_service: slave_service.stop(), slave_services) self._logger.debug('Stopping master service on {}...'.format(master)) master_service = RemoteMasterService(master, username, clusterrunner_executable) master_service.stop() self._logger.debug('Starting master service on {}:{}'.format( master_service.host, master_port)) master_service.start_and_block_until_up(master_port) self._logger.debug('Starting slave services') for slave_service in slave_services: try: slave_service.start(master, master_port, slave_port, num_executors) except Exception as e: # pylint: disable=broad-except self._logger.error( 'Failed to start slave service on {} with message: {}'. format(slave_service.host, e))
def test_is_process_running_returns_true_if_found_non_empty_output(self): self._mock_shell_exec_command({'ps ax | grep \'[s]ome_command\'': "\nrealoutput\n"}) remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') self.assertTrue(remote_master_service._is_process_running('some_command'))
def test_start_and_block_until_up_raises_exception_if_master_service_not_up(self): self.patch('app.deployment.remote_service.ShellClientFactory') self.patch('app.deployment.remote_master_service.ServiceRunner').return_value.is_up.return_value = False remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') with self.assertRaisesRegex(SystemExit, '1'): remote_master_service.start_and_block_until_up(43000, 5)
def test_start_and_block_until_up_doesnt_raise_exception_if_master_service_is_up(self): self.patch('app.deployment.remote_service.ShellClientFactory') self.patch('app.deployment.remote_master_service.ServiceRunner').return_value.is_up.return_value = True remote_master_service = RemoteMasterService('some_host', 'some_username', 'some_path') remote_master_service.start_and_block_until_up(43000, 5)