def declare_instance_nodes(self, running_instances): """ Removes dead ZooKeeper instance entries and adds running ones. Args: running_instances: An iterable of Instances. """ registered_instances = set() for version_key in self._zk_client.get_children(VERSION_REGISTRATION_NODE): version_node = '/'.join([VERSION_REGISTRATION_NODE, version_key]) for instance_entry in self._zk_client.get_children(version_node): machine_ip = instance_entry.split(':')[0] if machine_ip != self._private_ip: continue port = int(instance_entry.split(':')[-1]) instance_node = '/'.join([version_node, instance_entry]) revision = self._zk_client.get(instance_node)[0] revision_key = VERSION_PATH_SEPARATOR.join([version_key, revision]) registered_instances.add(Instance(revision_key, port)) # Remove outdated nodes. for instance in registered_instances - running_instances: self.unregister_instance(instance) # Add nodes for running instances. for instance in running_instances - registered_instances: self.register_instance(instance)
def _restart_unrouted_instances(self): """ Restarts instances that the router considers offline. """ with (yield self._work_lock.acquire()): failed_instances = yield self._routing_client.get_failed_instances( ) for version_key, port in failed_instances: try: instance = next(instance for instance in self._running_instances if instance.version_key == version_key and instance.port == port) except StopIteration: # If the manager has no recored of that instance, remove routing. self._routing_client.unregister_instance( Instance(version_key, port)) continue try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning( 'Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port)
def _recover_state(self): """ Establishes current state from Monit entries. """ logger.info('Getting current state') monit_entries = self._monit_operator.get_entries_sync() instance_entries = { entry: state for entry, state in monit_entries.items() if entry.startswith(MONIT_INSTANCE_PREFIX) } # Remove all unmonitored entries. removed = [] for entry, state in instance_entries.items(): if state == MonitStates.UNMONITORED: self._monit_operator.remove_configuration(entry) removed.append(entry) for entry in removed: del instance_entries[entry] if removed: self._monit_operator.reload_sync() instance_details = [] for entry, state in instance_entries.items(): revision, port = entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1) instance_details.append({ 'revision': revision, 'port': int(port), 'state': state }) clean_up_instances(instance_details) # Ensure version nodes exist. running_versions = { '_'.join(instance['revision'].split('_')[:3]) for instance in instance_details } self._zk_client.ensure_path(VERSION_REGISTRATION_NODE) for version_key in running_versions: self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version_key])) # Account for monitored instances. running_instances = { Instance(instance['revision'], instance['port']) for instance in instance_details } self._routing_client.declare_instance_nodes(running_instances) self._running_instances = running_instances
def _recover_state(self): """ Establishes current state from services. """ logger.info('Getting current state') service_entries = self._service_operator.list() instance_entries = { entry: state for entry, state in service_entries.items() if entry.startswith(SERVICE_INSTANCE_PREFIX) } instance_details = [] for entry, state in instance_entries.items(): revision, port = entry[entry.find('@') + 1:].rsplit('-', 2) instance_details.append({ 'revision': revision, 'port': int(port), 'state': state }) # Ensure version nodes exist. running_versions = { '_'.join(instance['revision'].split('_')[:3]) for instance in instance_details } self._zk_client.ensure_path(VERSION_REGISTRATION_NODE) for version_key in running_versions: self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version_key])) # Account for monitored instances. running_instances = { Instance(instance['revision'], instance['port']) for instance in instance_details } self._routing_client.declare_instance_nodes(running_instances) self._running_instances = running_instances
def _start_instance(self, version, port): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: version: A Version object. port: An integer specifying a port to use. """ version_details = version.version_details runtime = version_details['runtime'] env_vars = version_details.get('envVariables', {}) runtime_params = self._deployment_config.get_config( 'runtime_parameters') max_memory = runtime_params.get('default_max_appserver_memory', DEFAULT_MAX_APPSERVER_MEMORY) if 'instanceClass' in version_details: max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'], max_memory) source_archive = version_details['deployment']['zip']['sourceUrl'] api_server_port = yield self._ensure_api_server(version.project_id) yield self._source_manager.ensure_source(version.revision_key, source_archive, runtime) logger.info('Starting {}:{}'.format(version, port)) pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key, port=port) if runtime == GO: env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, version.revision_key, 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') watch = ''.join([MONIT_INSTANCE_PREFIX, version.revision_key]) if runtime in (PYTHON27, GO, PHP): start_cmd = create_python27_start_cmd(version.project_id, self._login_server, port, pidfile, version.revision_key, api_server_port) env_vars.update( create_python_app_env(self._login_server, version.project_id)) elif runtime == JAVA: # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = max_memory - 250 if max_heap <= 0: raise BadConfigurationException( 'Memory for Java applications must be greater than 250MB') start_cmd = create_java_start_cmd(version.project_id, port, self._login_server, max_heap, pidfile, version.revision_key, api_server_port) env_vars.update(create_java_app_env(self._deployment_config)) else: raise BadConfigurationException('Unknown runtime {} for {}'.format( runtime, version.project_id)) logger.info("Start command: " + str(start_cmd)) logger.info("Environment variables: " + str(env_vars)) monit_app_configuration.create_config_file(watch, start_cmd, pidfile, port, env_vars, max_memory, self._syslog_server, check_port=True, kill_exceeded_memory=True) full_watch = '{}-{}'.format(watch, port) yield self._monit_operator.reload(self._thread_pool) # The reload command does not block, and we don't have a good way to check # if Monit is ready with its new configuration yet. If the daemon begins # reloading while it is handling the 'start', it can end up in a state # where it never starts the process. As a temporary workaround, this # small period allows it to finish reloading. This can be removed if # instances are started inside a cgroup. yield gen.sleep(0.5) yield self._monit_operator.send_command_retry_process( full_watch, 'start') # Make sure the version registration node exists. self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version.version_key])) instance = Instance(version.revision_key, port) yield self._add_routing(instance) if version.project_id == DASHBOARD_PROJECT_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(version.project_id, log_size): logger.error( "Error while setting up log rotation for application: {}". format(version.project_id))
def _start_instance(self, version, port): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: version: A Version object. port: An integer specifying a port to use. """ version_details = version.version_details runtime = version_details['runtime'] env_vars = version_details.get('envVariables', {}) runtime_params = self._deployment_config.get_config( 'runtime_parameters') max_memory = runtime_params.get('default_max_appserver_memory', DEFAULT_MAX_APPSERVER_MEMORY) if 'instanceClass' in version_details: max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'], max_memory) source_archive = version_details['deployment']['zip']['sourceUrl'] http_port = version_details['appscaleExtensions']['httpPort'] api_server_port, api_services = yield self._ensure_api_server( version.project_id, runtime) yield self._source_manager.ensure_source(version.revision_key, source_archive, runtime) logger.info('Starting {}:{}'.format(version, port)) pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key, port=port) if runtime == GO: env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, version.revision_key, 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') if runtime in (PYTHON27, GO, PHP): start_cmd = create_python27_start_cmd(version.project_id, self._login_server, port, pidfile, version.revision_key, api_server_port) env_vars.update( create_python_app_env(self._login_server, version.project_id)) elif runtime in (JAVA, JAVA8): # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = max_memory - 250 if max_heap <= 0: raise BadConfigurationException( 'Memory for Java applications must be greater than 250MB') start_cmd = create_java_start_cmd(version.project_id, port, http_port, self._login_server, max_heap, pidfile, version.revision_key, api_server_port, runtime) env_vars.update( create_java_app_env(self._deployment_config, runtime, version.project_id)) else: raise BadConfigurationException('Unknown runtime {} for {}'.format( runtime, version.project_id)) logger.info("Start command: " + str(start_cmd)) logger.info("Environment variables: " + str(env_vars)) env_content = ' '.join( ['{}="{}"'.format(k, str(v)) for k, v in env_vars.items()]) command_content = 'exec env {} {}'.format(env_content, start_cmd) service_inst = '{}-{}'.format(version.revision_key, port) service_name = 'appscale-instance-run@{}'.format(service_inst) service_props = {'MemoryLimit': '{}M'.format(max_memory)} command_file_path = '/run/appscale/apps/command_{}'.format( service_inst) file_io.write(command_file_path, command_content) yield self._service_operator.start_async(service_name, wants=api_services, properties=service_props) # Make sure the version registration node exists. self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version.version_key])) instance = Instance(version.revision_key, port) yield self._add_routing(instance) if version.project_id == DASHBOARD_PROJECT_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(version.project_id, log_size): logger.error( "Error while setting up log rotation for application: {}". format(version.project_id))