def main_async(): """ Tries to stop all Monit services until they are stopped. """ monit_operator = MonitOperator() hostname = socket.gethostname() logger.info('Waiting for monit to stop services') logged_service_warning = False stopped_count = 0 while True: entries = yield monit_operator.get_entries() services = { service: state for service, state in entries.items() if 'cron' not in service and service != hostname } running = { service: state for service, state in services.items() if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED) } if not running: logger.info('Finished stopping services') break if len(services) - len(running) != stopped_count: stopped_count = len(services) - len(running) logger.info('Stopped {}/{} services'.format( stopped_count, len(services))) try: ordered_services, unrecognized_services = order_services( running.keys()) if unrecognized_services and not logged_service_warning: logger.warning('Unrecognized running services: {}'.format( unrecognized_services)) logged_service_warning = True ordered_services.append(unrecognized_services) for parallel_group in ordered_services: running = [ process for process in parallel_group if services[process] != MonitStates.PENDING ] if running: break else: continue @retry_coroutine(max_retries=5, retry_on_exception=DEFAULT_RETRIES) def stop_with_retries(process_name): logger.debug( 'Sending command to stop "{}"..'.format(process_name)) yield monit_operator.send_command(process_name, 'stop') yield [stop_with_retries(process) for process in running] except StopIteration: # If all running services are pending, just wait until they are not. pass yield gen.sleep(min(0.3 * len(running), 5))
def main(): """ Tries to stop all Monit services until they are stopped. """ monit_operator = MonitOperator() hostname = socket.gethostname() print('Waiting for monit to stop services') stopped_count = 0 while True: entries = monit_operator.get_entries_sync() services = {service: state for service, state in entries.items() if 'cron' not in service and service != hostname} running = {service: state for service, state in services.items() if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)} if not running: print('Finished stopping services') break if len(services) - len(running) != stopped_count: stopped_count = len(services) - len(running) print('Stopped {}/{} services'.format(stopped_count, len(services))) try: service = next((service for service in sorted(running.keys()) if services[service] != MonitStates.PENDING)) subprocess.Popen(['monit', 'stop', service]) except StopIteration: # If all running services are pending, just wait until they are not. pass time.sleep(.3)
def main(): """ Tries to stop all Monit services until they are stopped. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) monit_operator = MonitOperator() hostname = socket.gethostname() logging.info('Waiting for monit to stop services') logged_service_warning = False stopped_count = 0 while True: entries = monit_operator.get_entries_sync() services = { service: state for service, state in entries.items() if 'cron' not in service and service != hostname } running = { service: state for service, state in services.items() if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED) } if not running: logging.info('Finished stopping services') break if len(services) - len(running) != stopped_count: stopped_count = len(services) - len(running) logging.info('Stopped {}/{} services'.format( stopped_count, len(services))) try: ordered_services, unrecognized_services = order_services( running.keys()) if unrecognized_services and not logged_service_warning: logging.warning('Unrecognized running services: {}'.format( unrecognized_services)) logged_service_warning = True ordered_services = ordered_services + unrecognized_services service = next((service for service in ordered_services if services[service] != MonitStates.PENDING)) monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) send_w_retries = monit_retry(monit_operator.send_command_sync) send_w_retries(service, 'stop') except StopIteration: # If all running services are pending, just wait until they are not. pass time.sleep(.3)
def main(): file_io.set_logging_format() logging.getLogger().setLevel(logging.INFO) zk_ips = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_ips)) zk_client.start() deployment_config = DeploymentConfig(zk_client) projects_manager = GlobalProjectsManager(zk_client) thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) source_manager.configure_automatic_fetch(projects_manager) monit_operator = MonitOperator() options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) options.define('db_proxy', appscale_info.get_db_proxy()) options.define('tq_proxy', appscale_info.get_tq_proxy()) options.define('secret', appscale_info.get_secret()) routing_client = RoutingClient(zk_client, options.private_ip, options.secret) instance_manager = InstanceManager(zk_client, monit_operator, routing_client, projects_manager, deployment_config, source_manager, options.syslog_server, thread_pool, options.private_ip) instance_manager.start() logger.info('Starting AppManager') io_loop = IOLoop.current() io_loop.run_sync(instance_manager.populate_api_servers) io_loop.start()
def main(): """ Starts the AdminServer. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('-p', '--port', type=int, default=constants.DEFAULT_PORT, help='The port to listen on') parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) options.define('login_ip', appscale_info.get_login_ip()) options.define('private_ip', appscale_info.get_private_ip()) acc = appscale_info.get_appcontroller_client() ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret) zk_client = KazooClient(hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) monit_operator = MonitOperator() all_resources = { 'acc': acc, 'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool } if options.private_ip in appscale_info.get_taskqueue_nodes(): logging.info('Starting push worker manager') GlobalPushWorkerManager(zk_client, monit_operator) app = web.Application([ ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions', VersionsHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)', VersionHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler), ('/api/queue/update', UpdateQueuesHandler, { 'zk_client': zk_client }) ]) logging.info('Starting AdminServer') app.listen(args.port) io_loop = IOLoop.current() io_loop.start()
def start_service(): """ Starts a service using the Monit HTTP API. """ parser = argparse.ArgumentParser() parser.add_argument('service', help='The service to start') args = parser.parse_args() monit_operator = MonitOperator() monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) send_w_retries = monit_retry(monit_operator.send_command_sync) send_w_retries(args.service, 'start')
def main(): """ Tries to stop all Monit services until they are stopped. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) monit_operator = MonitOperator() hostname = socket.gethostname() logger.info('Waiting for monit to stop services') logged_service_warning = False stopped_count = 0 while True: entries = monit_operator.get_entries_sync() services = {service: state for service, state in entries.items() if 'cron' not in service and service != hostname} running = {service: state for service, state in services.items() if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)} if not running: logger.info('Finished stopping services') break if len(services) - len(running) != stopped_count: stopped_count = len(services) - len(running) logger.info( 'Stopped {}/{} services'.format(stopped_count, len(services))) try: ordered_services, unrecognized_services = order_services(running.keys()) if unrecognized_services and not logged_service_warning: logger.warning( 'Unrecognized running services: {}'.format(unrecognized_services)) logged_service_warning = True ordered_services = ordered_services + unrecognized_services service = next((service for service in ordered_services if services[service] != MonitStates.PENDING)) monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) send_w_retries = monit_retry(monit_operator.send_command_sync) send_w_retries(service, 'stop') except StopIteration: # If all running services are pending, just wait until they are not. pass time.sleep(.3)
def ensure_api_server(project_id): """ Make sure there is a running API server for a project. Args: project_id: A string specifying the project ID. Returns: An integer specifying the API server port. """ global api_servers if project_id in api_servers: raise gen.Return(api_servers[project_id]) server_port = MAX_API_SERVER_PORT for port in api_servers.values(): if port <= server_port: server_port = port - 1 zk_locations = appscale_info.get_zk_node_ips() start_cmd = ' '.join([ API_SERVER_LOCATION, '--port', str(server_port), '--project-id', project_id, '--zookeeper-locations', ' '.join(zk_locations) ]) watch = ''.join([API_SERVER_PREFIX, project_id]) full_watch = '-'.join([watch, str(server_port)]) pidfile = os.path.join(VAR_DIR, '{}.pid'.format(full_watch)) monit_app_configuration.create_config_file( watch, start_cmd, pidfile, server_port, max_memory=DEFAULT_MAX_APPSERVER_MEMORY, check_port=True) monit_operator = MonitOperator() yield monit_operator.reload(thread_pool) yield monit_operator.send_command_retry_process(full_watch, 'start') api_servers[project_id] = server_port raise gen.Return(server_port)
def main(): """ Tries to stop all Monit services until they are stopped. """ monit_operator = MonitOperator() hostname = socket.gethostname() print('Waiting for monit to stop services') stopped_count = 0 while True: entries = monit_operator.get_entries_sync() services = { service: state for service, state in entries.items() if 'cron' not in service and service != hostname } running = { service: state for service, state in services.items() if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED) } if not running: print('Finished stopping services') break if len(services) - len(running) != stopped_count: stopped_count = len(services) - len(running) print('Stopped {}/{} services'.format(stopped_count, len(services))) try: service = next((service for service in sorted(running.keys()) if services[service] != MonitStates.PENDING)) subprocess.Popen(['monit', 'stop', service]) except StopIteration: # If all running services are pending, just wait until they are not. pass time.sleep(.3)
def stop_service(): """ Stops a service using the Monit HTTP API. """ parser = argparse.ArgumentParser() parser.add_argument('service', help='The service to stop') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) try: monit_operator = MonitOperator() monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) send_w_retries = monit_retry(monit_operator.send_command_sync) send_w_retries(args.service, 'stop') except ProcessNotFound as e: logging.info(str(e)) sys.exit(1)
def main(): """ Starts the AdminServer. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser( prog='appscale-admin', description='Manages AppScale-related processes') subparsers = parser.add_subparsers(dest='command') subparsers.required = True serve_parser = subparsers.add_parser( 'serve', description='Starts the server that manages AppScale processes') serve_parser.add_argument( '-p', '--port', type=int, default=constants.DEFAULT_PORT, help='The port to listen on') serve_parser.add_argument( '-v', '--verbose', action='store_true', help='Output debug-level logging') subparsers.add_parser( 'summary', description='Lists AppScale processes running on this machine') restart_parser = subparsers.add_parser( 'restart', description='Restart AppScale processes running on this machine') restart_parser.add_argument('service', nargs='+', help='The process or service ID to restart') args = parser.parse_args() if args.command == 'summary': table = sorted(list(get_combined_services().items())) print(tabulate(table, headers=['Service', 'State'])) sys.exit(0) if args.command == 'restart': socket_path = urlquote(ServiceManagerHandler.SOCKET_PATH, safe='') session = requests_unixsocket.Session() response = session.post( 'http+unix://{}/'.format(socket_path), data={'command': 'restart', 'arg': [args.service]}) response.raise_for_status() return if args.verbose: logger.setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) options.define('login_ip', appscale_info.get_login_ip()) options.define('private_ip', appscale_info.get_private_ip()) options.define('load_balancers', appscale_info.get_load_balancer_ips()) acc = appscale_info.get_appcontroller_client() ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret) zk_client = KazooClient( hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) monit_operator = MonitOperator() all_resources = { 'acc': acc, 'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool } if options.private_ip in appscale_info.get_taskqueue_nodes(): logger.info('Starting push worker manager') GlobalPushWorkerManager(zk_client, monit_operator) service_manager = ServiceManager(zk_client) service_manager.start() app = web.Application([ ('/oauth/token', OAuthHandler, {'ua_client': ua_client}), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions', VersionsHandler, {'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool}), ('/v1/projects', ProjectsHandler, all_resources), ('/v1/projects/([a-z0-9-]+)', ProjectHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)', ServiceHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)', VersionHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler, {'ua_client': ua_client}), ('/api/cron/update', UpdateCronHandler, {'acc': acc, 'zk_client': zk_client, 'ua_client': ua_client}), ('/api/datastore/index/add', UpdateIndexesHandler, {'zk_client': zk_client, 'ua_client': ua_client}), ('/api/queue/update', UpdateQueuesHandler, {'zk_client': zk_client, 'ua_client': ua_client}) ]) logger.info('Starting AdminServer') app.listen(args.port) management_app = web.Application([ ('/', ServiceManagerHandler, {'service_manager': service_manager})]) management_server = HTTPServer(management_app) management_socket = bind_unix_socket(ServiceManagerHandler.SOCKET_PATH) management_server.add_socket(management_socket) io_loop = IOLoop.current() io_loop.start()
# The amount of seconds to wait before retrying to add routing. ROUTING_RETRY_INTERVAL = 5 PIDFILE_TEMPLATE = os.path.join('/', 'var', 'run', 'appscale', 'app___{project}-{port}.pid') # The number of seconds an instance is allowed to finish serving requests after # it receives a shutdown signal. MAX_INSTANCE_RESPONSE_TIME = 600 # A DeploymentConfig accessor. deployment_config = None # An interface for working with Monit. monit_operator = MonitOperator() class BadConfigurationException(Exception): """ An application is configured incorrectly. """ def __init__(self, value): Exception.__init__(self, value) self.value = value def __str__(self): return repr(self.value) class NoRedirection(urllib2.HTTPErrorProcessor): """ A url opener that does not automatically redirect. """ def http_response(self, request, response):
def start_app(version_key, config): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: version_key: A string specifying a version key. config: a dictionary that contains app_port: An integer specifying the port to use. login_server: The server address the AppServer will use for login urls. """ if 'app_port' not in config: raise BadConfigurationException('app_port is required') if 'login_server' not in config or not config['login_server']: raise BadConfigurationException('login_server is required') login_server = config['login_server'] project_id, service_id, version_id = version_key.split( VERSION_PATH_SEPARATOR) if not misc.is_app_name_valid(project_id): raise BadConfigurationException( 'Invalid project ID: {}'.format(project_id)) try: service_manager = projects_manager[project_id][service_id] version_details = service_manager[version_id].version_details except KeyError: raise BadConfigurationException('Version not found') runtime = version_details['runtime'] env_vars = version_details.get('envVariables', {}) runtime_params = deployment_config.get_config('runtime_parameters') max_memory = runtime_params.get('default_max_appserver_memory', DEFAULT_MAX_APPSERVER_MEMORY) if 'instanceClass' in version_details: max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'], max_memory) revision_key = VERSION_PATH_SEPARATOR.join( [project_id, service_id, version_id, str(version_details['revision'])]) source_archive = version_details['deployment']['zip']['sourceUrl'] api_server_port = yield ensure_api_server(project_id) yield source_manager.ensure_source(revision_key, source_archive, runtime) logging.info('Starting {} application {}'.format(runtime, project_id)) pidfile = PIDFILE_TEMPLATE.format(revision=revision_key, port=config['app_port']) if runtime == constants.GO: env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, revision_key, 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') watch = ''.join([MONIT_INSTANCE_PREFIX, revision_key]) if runtime in (constants.PYTHON27, constants.GO, constants.PHP): start_cmd = create_python27_start_cmd(project_id, login_server, config['app_port'], pidfile, revision_key, api_server_port) env_vars.update(create_python_app_env(login_server, project_id)) elif runtime == constants.JAVA: # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = max_memory - 250 if max_heap <= 0: raise BadConfigurationException( 'Memory for Java applications must be greater than 250MB') start_cmd = create_java_start_cmd(project_id, config['app_port'], login_server, max_heap, pidfile, revision_key, api_server_port) env_vars.update(create_java_app_env(project_id)) else: raise BadConfigurationException('Unknown runtime {} for {}'.format( runtime, project_id)) logging.info("Start command: " + str(start_cmd)) logging.info("Environment variables: " + str(env_vars)) monit_app_configuration.create_config_file(watch, start_cmd, pidfile, config['app_port'], env_vars, max_memory, options.syslog_server, check_port=True, kill_exceeded_memory=True) full_watch = '{}-{}'.format(watch, config['app_port']) monit_operator = MonitOperator() yield monit_operator.reload(thread_pool) yield monit_operator.send_command_retry_process(full_watch, 'start') # Make sure the version node exists. zk_client.ensure_path('/'.join([VERSION_REGISTRATION_NODE, version_key])) # Since we are going to wait, possibly for a long time for the # application to be ready, we do it later. IOLoop.current().spawn_callback(add_routing, Instance(revision_key, config['app_port'])) if project_id == DASHBOARD_PROJECT_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(project_id, log_size): logging.error( "Error while setting up log rotation for application: {}".format( project_id))