def declare_instance_nodes(self, running_instances):
    """ Removes dead ZooKeeper instance entries and adds running ones.

    Args:
      running_instances: An iterable of Instances.
    """
    registered_instances = set()
    for version_key in self._zk_client.get_children(VERSION_REGISTRATION_NODE):
      version_node = '/'.join([VERSION_REGISTRATION_NODE, version_key])
      for instance_entry in self._zk_client.get_children(version_node):
        machine_ip = instance_entry.split(':')[0]
        if machine_ip != self._private_ip:
          continue

        port = int(instance_entry.split(':')[-1])
        instance_node = '/'.join([version_node, instance_entry])
        revision = self._zk_client.get(instance_node)[0]
        revision_key = VERSION_PATH_SEPARATOR.join([version_key, revision])
        registered_instances.add(Instance(revision_key, port))

    # Remove outdated nodes.
    for instance in registered_instances - running_instances:
      self.unregister_instance(instance)

    # Add nodes for running instances.
    for instance in running_instances - registered_instances:
      self.register_instance(instance)
    def _restart_unrouted_instances(self):
        """ Restarts instances that the router considers offline. """
        with (yield self._work_lock.acquire()):
            failed_instances = yield self._routing_client.get_failed_instances(
            )
            for version_key, port in failed_instances:
                try:
                    instance = next(instance
                                    for instance in self._running_instances
                                    if instance.version_key == version_key
                                    and instance.port == port)
                except StopIteration:
                    # If the manager has no recored of that instance, remove routing.
                    self._routing_client.unregister_instance(
                        Instance(version_key, port))
                    continue

                try:
                    version = self._projects_manager.version_from_key(
                        instance.version_key)
                except KeyError:
                    # If the version no longer exists, avoid doing any work. The
                    # scheduler should remove any assignments for it.
                    continue

                logger.warning(
                    'Restarting failed instance: {}'.format(instance))
                yield self._stop_app_instance(instance)
                yield self._start_instance(version, instance.port)
    def _recover_state(self):
        """ Establishes current state from Monit entries. """
        logger.info('Getting current state')
        monit_entries = self._monit_operator.get_entries_sync()
        instance_entries = {
            entry: state
            for entry, state in monit_entries.items()
            if entry.startswith(MONIT_INSTANCE_PREFIX)
        }

        # Remove all unmonitored entries.
        removed = []
        for entry, state in instance_entries.items():
            if state == MonitStates.UNMONITORED:
                self._monit_operator.remove_configuration(entry)
                removed.append(entry)

        for entry in removed:
            del instance_entries[entry]

        if removed:
            self._monit_operator.reload_sync()

        instance_details = []
        for entry, state in instance_entries.items():
            revision, port = entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1)
            instance_details.append({
                'revision': revision,
                'port': int(port),
                'state': state
            })

        clean_up_instances(instance_details)

        # Ensure version nodes exist.
        running_versions = {
            '_'.join(instance['revision'].split('_')[:3])
            for instance in instance_details
        }
        self._zk_client.ensure_path(VERSION_REGISTRATION_NODE)
        for version_key in running_versions:
            self._zk_client.ensure_path('/'.join(
                [VERSION_REGISTRATION_NODE, version_key]))

        # Account for monitored instances.
        running_instances = {
            Instance(instance['revision'], instance['port'])
            for instance in instance_details
        }
        self._routing_client.declare_instance_nodes(running_instances)
        self._running_instances = running_instances
Exemple #4
0
    def _recover_state(self):
        """ Establishes current state from services. """
        logger.info('Getting current state')
        service_entries = self._service_operator.list()
        instance_entries = {
            entry: state
            for entry, state in service_entries.items()
            if entry.startswith(SERVICE_INSTANCE_PREFIX)
        }

        instance_details = []
        for entry, state in instance_entries.items():
            revision, port = entry[entry.find('@') + 1:].rsplit('-', 2)
            instance_details.append({
                'revision': revision,
                'port': int(port),
                'state': state
            })

        # Ensure version nodes exist.
        running_versions = {
            '_'.join(instance['revision'].split('_')[:3])
            for instance in instance_details
        }
        self._zk_client.ensure_path(VERSION_REGISTRATION_NODE)
        for version_key in running_versions:
            self._zk_client.ensure_path('/'.join(
                [VERSION_REGISTRATION_NODE, version_key]))

        # Account for monitored instances.
        running_instances = {
            Instance(instance['revision'], instance['port'])
            for instance in instance_details
        }
        self._routing_client.declare_instance_nodes(running_instances)
        self._running_instances = running_instances
    def _start_instance(self, version, port):
        """ Starts a Google App Engine application on this machine. It
        will start it up and then proceed to fetch the main page.

    Args:
      version: A Version object.
      port: An integer specifying a port to use.
    """
        version_details = version.version_details
        runtime = version_details['runtime']
        env_vars = version_details.get('envVariables', {})
        runtime_params = self._deployment_config.get_config(
            'runtime_parameters')
        max_memory = runtime_params.get('default_max_appserver_memory',
                                        DEFAULT_MAX_APPSERVER_MEMORY)
        if 'instanceClass' in version_details:
            max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'],
                                              max_memory)

        source_archive = version_details['deployment']['zip']['sourceUrl']

        api_server_port = yield self._ensure_api_server(version.project_id)
        yield self._source_manager.ensure_source(version.revision_key,
                                                 source_archive, runtime)

        logger.info('Starting {}:{}'.format(version, port))

        pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key,
                                          port=port)

        if runtime == GO:
            env_vars['GOPATH'] = os.path.join(UNPACK_ROOT,
                                              version.revision_key, 'gopath')
            env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot')

        watch = ''.join([MONIT_INSTANCE_PREFIX, version.revision_key])
        if runtime in (PYTHON27, GO, PHP):
            start_cmd = create_python27_start_cmd(version.project_id,
                                                  self._login_server, port,
                                                  pidfile,
                                                  version.revision_key,
                                                  api_server_port)
            env_vars.update(
                create_python_app_env(self._login_server, version.project_id))
        elif runtime == JAVA:
            # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread
            # stacks (~20MB).
            max_heap = max_memory - 250
            if max_heap <= 0:
                raise BadConfigurationException(
                    'Memory for Java applications must be greater than 250MB')

            start_cmd = create_java_start_cmd(version.project_id, port,
                                              self._login_server, max_heap,
                                              pidfile, version.revision_key,
                                              api_server_port)

            env_vars.update(create_java_app_env(self._deployment_config))
        else:
            raise BadConfigurationException('Unknown runtime {} for {}'.format(
                runtime, version.project_id))

        logger.info("Start command: " + str(start_cmd))
        logger.info("Environment variables: " + str(env_vars))

        monit_app_configuration.create_config_file(watch,
                                                   start_cmd,
                                                   pidfile,
                                                   port,
                                                   env_vars,
                                                   max_memory,
                                                   self._syslog_server,
                                                   check_port=True,
                                                   kill_exceeded_memory=True)

        full_watch = '{}-{}'.format(watch, port)

        yield self._monit_operator.reload(self._thread_pool)

        # The reload command does not block, and we don't have a good way to check
        # if Monit is ready with its new configuration yet. If the daemon begins
        # reloading while it is handling the 'start', it can end up in a state
        # where it never starts the process. As a temporary workaround, this
        # small period allows it to finish reloading. This can be removed if
        # instances are started inside a cgroup.
        yield gen.sleep(0.5)
        yield self._monit_operator.send_command_retry_process(
            full_watch, 'start')

        # Make sure the version registration node exists.
        self._zk_client.ensure_path('/'.join(
            [VERSION_REGISTRATION_NODE, version.version_key]))

        instance = Instance(version.revision_key, port)
        yield self._add_routing(instance)

        if version.project_id == DASHBOARD_PROJECT_ID:
            log_size = DASHBOARD_LOG_SIZE
        else:
            log_size = APP_LOG_SIZE

        if not setup_logrotate(version.project_id, log_size):
            logger.error(
                "Error while setting up log rotation for application: {}".
                format(version.project_id))
Exemple #6
0
    def _start_instance(self, version, port):
        """ Starts a Google App Engine application on this machine. It
        will start it up and then proceed to fetch the main page.

    Args:
      version: A Version object.
      port: An integer specifying a port to use.
    """
        version_details = version.version_details
        runtime = version_details['runtime']
        env_vars = version_details.get('envVariables', {})
        runtime_params = self._deployment_config.get_config(
            'runtime_parameters')
        max_memory = runtime_params.get('default_max_appserver_memory',
                                        DEFAULT_MAX_APPSERVER_MEMORY)
        if 'instanceClass' in version_details:
            max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'],
                                              max_memory)

        source_archive = version_details['deployment']['zip']['sourceUrl']
        http_port = version_details['appscaleExtensions']['httpPort']

        api_server_port, api_services = yield self._ensure_api_server(
            version.project_id, runtime)
        yield self._source_manager.ensure_source(version.revision_key,
                                                 source_archive, runtime)

        logger.info('Starting {}:{}'.format(version, port))

        pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key,
                                          port=port)

        if runtime == GO:
            env_vars['GOPATH'] = os.path.join(UNPACK_ROOT,
                                              version.revision_key, 'gopath')
            env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot')

        if runtime in (PYTHON27, GO, PHP):
            start_cmd = create_python27_start_cmd(version.project_id,
                                                  self._login_server, port,
                                                  pidfile,
                                                  version.revision_key,
                                                  api_server_port)
            env_vars.update(
                create_python_app_env(self._login_server, version.project_id))
        elif runtime in (JAVA, JAVA8):
            # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread
            # stacks (~20MB).
            max_heap = max_memory - 250
            if max_heap <= 0:
                raise BadConfigurationException(
                    'Memory for Java applications must be greater than 250MB')

            start_cmd = create_java_start_cmd(version.project_id, port,
                                              http_port, self._login_server,
                                              max_heap, pidfile,
                                              version.revision_key,
                                              api_server_port, runtime)

            env_vars.update(
                create_java_app_env(self._deployment_config, runtime,
                                    version.project_id))
        else:
            raise BadConfigurationException('Unknown runtime {} for {}'.format(
                runtime, version.project_id))

        logger.info("Start command: " + str(start_cmd))
        logger.info("Environment variables: " + str(env_vars))

        env_content = ' '.join(
            ['{}="{}"'.format(k, str(v)) for k, v in env_vars.items()])
        command_content = 'exec env {} {}'.format(env_content, start_cmd)
        service_inst = '{}-{}'.format(version.revision_key, port)
        service_name = 'appscale-instance-run@{}'.format(service_inst)
        service_props = {'MemoryLimit': '{}M'.format(max_memory)}
        command_file_path = '/run/appscale/apps/command_{}'.format(
            service_inst)
        file_io.write(command_file_path, command_content)

        yield self._service_operator.start_async(service_name,
                                                 wants=api_services,
                                                 properties=service_props)

        # Make sure the version registration node exists.
        self._zk_client.ensure_path('/'.join(
            [VERSION_REGISTRATION_NODE, version.version_key]))

        instance = Instance(version.revision_key, port)
        yield self._add_routing(instance)

        if version.project_id == DASHBOARD_PROJECT_ID:
            log_size = DASHBOARD_LOG_SIZE
        else:
            log_size = APP_LOG_SIZE

        if not setup_logrotate(version.project_id, log_size):
            logger.error(
                "Error while setting up log rotation for application: {}".
                format(version.project_id))