Example #1
0
def main_async():
    """ Tries to stop all Monit services until they are stopped. """
    monit_operator = MonitOperator()
    hostname = socket.gethostname()

    logger.info('Waiting for monit to stop services')
    logged_service_warning = False
    stopped_count = 0
    while True:
        entries = yield monit_operator.get_entries()
        services = {
            service: state
            for service, state in entries.items()
            if 'cron' not in service and service != hostname
        }
        running = {
            service: state
            for service, state in services.items()
            if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)
        }
        if not running:
            logger.info('Finished stopping services')
            break

        if len(services) - len(running) != stopped_count:
            stopped_count = len(services) - len(running)
            logger.info('Stopped {}/{} services'.format(
                stopped_count, len(services)))

        try:
            ordered_services, unrecognized_services = order_services(
                running.keys())
            if unrecognized_services and not logged_service_warning:
                logger.warning('Unrecognized running services: {}'.format(
                    unrecognized_services))
                logged_service_warning = True

            ordered_services.append(unrecognized_services)
            for parallel_group in ordered_services:
                running = [
                    process for process in parallel_group
                    if services[process] != MonitStates.PENDING
                ]
                if running:
                    break
            else:
                continue

            @retry_coroutine(max_retries=5, retry_on_exception=DEFAULT_RETRIES)
            def stop_with_retries(process_name):
                logger.debug(
                    'Sending command to stop "{}"..'.format(process_name))
                yield monit_operator.send_command(process_name, 'stop')

            yield [stop_with_retries(process) for process in running]
        except StopIteration:
            # If all running services are pending, just wait until they are not.
            pass

        yield gen.sleep(min(0.3 * len(running), 5))
Example #2
0
def main():
  """ Tries to stop all Monit services until they are stopped. """
  monit_operator = MonitOperator()
  hostname = socket.gethostname()

  print('Waiting for monit to stop services')
  stopped_count = 0
  while True:
    entries = monit_operator.get_entries_sync()
    services = {service: state for service, state in entries.items()
                if 'cron' not in service and service != hostname}
    running = {service: state for service, state in services.items()
               if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)}
    if not running:
      print('Finished stopping services')
      break

    if len(services) - len(running) != stopped_count:
      stopped_count = len(services) - len(running)
      print('Stopped {}/{} services'.format(stopped_count, len(services)))

    try:
      service = next((service for service in sorted(running.keys())
                      if services[service] != MonitStates.PENDING))
      subprocess.Popen(['monit', 'stop', service])
    except StopIteration:
      # If all running services are pending, just wait until they are not.
      pass

    time.sleep(.3)
Example #3
0
def main():
    """ Tries to stop all Monit services until they are stopped. """
    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
    monit_operator = MonitOperator()
    hostname = socket.gethostname()

    logging.info('Waiting for monit to stop services')
    logged_service_warning = False
    stopped_count = 0
    while True:
        entries = monit_operator.get_entries_sync()
        services = {
            service: state
            for service, state in entries.items()
            if 'cron' not in service and service != hostname
        }
        running = {
            service: state
            for service, state in services.items()
            if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)
        }
        if not running:
            logging.info('Finished stopping services')
            break

        if len(services) - len(running) != stopped_count:
            stopped_count = len(services) - len(running)
            logging.info('Stopped {}/{} services'.format(
                stopped_count, len(services)))

        try:
            ordered_services, unrecognized_services = order_services(
                running.keys())
            if unrecognized_services and not logged_service_warning:
                logging.warning('Unrecognized running services: {}'.format(
                    unrecognized_services))
                logged_service_warning = True

            ordered_services = ordered_services + unrecognized_services
            service = next((service for service in ordered_services
                            if services[service] != MonitStates.PENDING))

            monit_retry = retry(max_retries=5,
                                retry_on_exception=DEFAULT_RETRIES)
            send_w_retries = monit_retry(monit_operator.send_command_sync)
            send_w_retries(service, 'stop')
        except StopIteration:
            # If all running services are pending, just wait until they are not.
            pass

        time.sleep(.3)
Example #4
0
def main():
    file_io.set_logging_format()
    logging.getLogger().setLevel(logging.INFO)

    zk_ips = appscale_info.get_zk_node_ips()
    zk_client = KazooClient(hosts=','.join(zk_ips))
    zk_client.start()

    deployment_config = DeploymentConfig(zk_client)
    projects_manager = GlobalProjectsManager(zk_client)
    thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS)
    source_manager = SourceManager(zk_client, thread_pool)
    source_manager.configure_automatic_fetch(projects_manager)
    monit_operator = MonitOperator()

    options.define('private_ip', appscale_info.get_private_ip())
    options.define('syslog_server', appscale_info.get_headnode_ip())
    options.define('db_proxy', appscale_info.get_db_proxy())
    options.define('tq_proxy', appscale_info.get_tq_proxy())
    options.define('secret', appscale_info.get_secret())

    routing_client = RoutingClient(zk_client, options.private_ip,
                                   options.secret)
    instance_manager = InstanceManager(zk_client, monit_operator,
                                       routing_client, projects_manager,
                                       deployment_config, source_manager,
                                       options.syslog_server, thread_pool,
                                       options.private_ip)
    instance_manager.start()

    logger.info('Starting AppManager')

    io_loop = IOLoop.current()
    io_loop.run_sync(instance_manager.populate_api_servers)
    io_loop.start()
Example #5
0
def main():
    """ Starts the AdminServer. """
    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        default=constants.DEFAULT_PORT,
                        help='The port to listen on')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Output debug-level logging')
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    options.define('secret', appscale_info.get_secret())
    options.define('login_ip', appscale_info.get_login_ip())
    options.define('private_ip', appscale_info.get_private_ip())

    acc = appscale_info.get_appcontroller_client()
    ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret)
    zk_client = KazooClient(hosts=','.join(appscale_info.get_zk_node_ips()),
                            connection_retry=ZK_PERSISTENT_RECONNECTS)
    zk_client.start()
    version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE)
    thread_pool = ThreadPoolExecutor(4)
    monit_operator = MonitOperator()
    all_resources = {
        'acc': acc,
        'ua_client': ua_client,
        'zk_client': zk_client,
        'version_update_lock': version_update_lock,
        'thread_pool': thread_pool
    }

    if options.private_ip in appscale_info.get_taskqueue_nodes():
        logging.info('Starting push worker manager')
        GlobalPushWorkerManager(zk_client, monit_operator)

    app = web.Application([
        ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions',
         VersionsHandler, all_resources),
        ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)',
         VersionHandler, all_resources),
        ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler),
        ('/api/queue/update', UpdateQueuesHandler, {
            'zk_client': zk_client
        })
    ])
    logging.info('Starting AdminServer')
    app.listen(args.port)
    io_loop = IOLoop.current()
    io_loop.start()
Example #6
0
def start_service():
    """ Starts a service using the Monit HTTP API. """
    parser = argparse.ArgumentParser()
    parser.add_argument('service', help='The service to start')
    args = parser.parse_args()

    monit_operator = MonitOperator()
    monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES)
    send_w_retries = monit_retry(monit_operator.send_command_sync)
    send_w_retries(args.service, 'start')
Example #7
0
def main():
  """ Tries to stop all Monit services until they are stopped. """
  logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
  monit_operator = MonitOperator()
  hostname = socket.gethostname()

  logger.info('Waiting for monit to stop services')
  logged_service_warning = False
  stopped_count = 0
  while True:
    entries = monit_operator.get_entries_sync()
    services = {service: state for service, state in entries.items()
                if 'cron' not in service and service != hostname}
    running = {service: state for service, state in services.items()
               if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)}
    if not running:
      logger.info('Finished stopping services')
      break

    if len(services) - len(running) != stopped_count:
      stopped_count = len(services) - len(running)
      logger.info(
        'Stopped {}/{} services'.format(stopped_count, len(services)))

    try:
      ordered_services, unrecognized_services = order_services(running.keys())
      if unrecognized_services and not logged_service_warning:
        logger.warning(
          'Unrecognized running services: {}'.format(unrecognized_services))
        logged_service_warning = True

      ordered_services = ordered_services + unrecognized_services
      service = next((service for service in ordered_services
                      if services[service] != MonitStates.PENDING))

      monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES)
      send_w_retries = monit_retry(monit_operator.send_command_sync)
      send_w_retries(service, 'stop')
    except StopIteration:
      # If all running services are pending, just wait until they are not.
      pass

    time.sleep(.3)
def ensure_api_server(project_id):
    """ Make sure there is a running API server for a project.

  Args:
    project_id: A string specifying the project ID.
  Returns:
    An integer specifying the API server port.
  """
    global api_servers
    if project_id in api_servers:
        raise gen.Return(api_servers[project_id])

    server_port = MAX_API_SERVER_PORT
    for port in api_servers.values():
        if port <= server_port:
            server_port = port - 1

    zk_locations = appscale_info.get_zk_node_ips()
    start_cmd = ' '.join([
        API_SERVER_LOCATION, '--port',
        str(server_port), '--project-id', project_id, '--zookeeper-locations',
        ' '.join(zk_locations)
    ])

    watch = ''.join([API_SERVER_PREFIX, project_id])
    full_watch = '-'.join([watch, str(server_port)])
    pidfile = os.path.join(VAR_DIR, '{}.pid'.format(full_watch))
    monit_app_configuration.create_config_file(
        watch,
        start_cmd,
        pidfile,
        server_port,
        max_memory=DEFAULT_MAX_APPSERVER_MEMORY,
        check_port=True)

    monit_operator = MonitOperator()
    yield monit_operator.reload(thread_pool)
    yield monit_operator.send_command_retry_process(full_watch, 'start')

    api_servers[project_id] = server_port
    raise gen.Return(server_port)
Example #9
0
def main():
    """ Tries to stop all Monit services until they are stopped. """
    monit_operator = MonitOperator()
    hostname = socket.gethostname()

    print('Waiting for monit to stop services')
    stopped_count = 0
    while True:
        entries = monit_operator.get_entries_sync()
        services = {
            service: state
            for service, state in entries.items()
            if 'cron' not in service and service != hostname
        }
        running = {
            service: state
            for service, state in services.items()
            if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)
        }
        if not running:
            print('Finished stopping services')
            break

        if len(services) - len(running) != stopped_count:
            stopped_count = len(services) - len(running)
            print('Stopped {}/{} services'.format(stopped_count,
                                                  len(services)))

        try:
            service = next((service for service in sorted(running.keys())
                            if services[service] != MonitStates.PENDING))
            subprocess.Popen(['monit', 'stop', service])
        except StopIteration:
            # If all running services are pending, just wait until they are not.
            pass

        time.sleep(.3)
Example #10
0
def stop_service():
    """ Stops a service using the Monit HTTP API. """
    parser = argparse.ArgumentParser()
    parser.add_argument('service', help='The service to stop')
    args = parser.parse_args()

    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
    try:
        monit_operator = MonitOperator()
        monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES)
        send_w_retries = monit_retry(monit_operator.send_command_sync)
        send_w_retries(args.service, 'stop')
    except ProcessNotFound as e:
        logging.info(str(e))
        sys.exit(1)
Example #11
0
def main():
  """ Starts the AdminServer. """
  logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

  parser = argparse.ArgumentParser(
    prog='appscale-admin', description='Manages AppScale-related processes')
  subparsers = parser.add_subparsers(dest='command')
  subparsers.required = True

  serve_parser = subparsers.add_parser(
    'serve', description='Starts the server that manages AppScale processes')
  serve_parser.add_argument(
    '-p', '--port', type=int, default=constants.DEFAULT_PORT,
    help='The port to listen on')
  serve_parser.add_argument(
    '-v', '--verbose', action='store_true', help='Output debug-level logging')

  subparsers.add_parser(
    'summary', description='Lists AppScale processes running on this machine')
  restart_parser = subparsers.add_parser(
    'restart',
    description='Restart AppScale processes running on this machine')
  restart_parser.add_argument('service', nargs='+',
                              help='The process or service ID to restart')

  args = parser.parse_args()
  if args.command == 'summary':
    table = sorted(list(get_combined_services().items()))
    print(tabulate(table, headers=['Service', 'State']))
    sys.exit(0)

  if args.command == 'restart':
    socket_path = urlquote(ServiceManagerHandler.SOCKET_PATH, safe='')
    session = requests_unixsocket.Session()
    response = session.post(
      'http+unix://{}/'.format(socket_path),
      data={'command': 'restart', 'arg': [args.service]})
    response.raise_for_status()
    return

  if args.verbose:
    logger.setLevel(logging.DEBUG)

  options.define('secret', appscale_info.get_secret())
  options.define('login_ip', appscale_info.get_login_ip())
  options.define('private_ip', appscale_info.get_private_ip())
  options.define('load_balancers', appscale_info.get_load_balancer_ips())

  acc = appscale_info.get_appcontroller_client()
  ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret)
  zk_client = KazooClient(
    hosts=','.join(appscale_info.get_zk_node_ips()),
    connection_retry=ZK_PERSISTENT_RECONNECTS)
  zk_client.start()
  version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE)
  thread_pool = ThreadPoolExecutor(4)
  monit_operator = MonitOperator()
  all_resources = {
    'acc': acc,
    'ua_client': ua_client,
    'zk_client': zk_client,
    'version_update_lock': version_update_lock,
    'thread_pool': thread_pool
  }

  if options.private_ip in appscale_info.get_taskqueue_nodes():
    logger.info('Starting push worker manager')
    GlobalPushWorkerManager(zk_client, monit_operator)

  service_manager = ServiceManager(zk_client)
  service_manager.start()

  app = web.Application([
    ('/oauth/token', OAuthHandler, {'ua_client': ua_client}),
    ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions', VersionsHandler,
     {'ua_client': ua_client, 'zk_client': zk_client,
      'version_update_lock': version_update_lock, 'thread_pool': thread_pool}),
    ('/v1/projects', ProjectsHandler, all_resources),
    ('/v1/projects/([a-z0-9-]+)', ProjectHandler, all_resources),
    ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)', ServiceHandler,
     all_resources),
    ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)',
     VersionHandler, all_resources),
    ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler,
     {'ua_client': ua_client}),
    ('/api/cron/update', UpdateCronHandler,
     {'acc': acc, 'zk_client': zk_client, 'ua_client': ua_client}),
    ('/api/datastore/index/add', UpdateIndexesHandler,
     {'zk_client': zk_client, 'ua_client': ua_client}),
    ('/api/queue/update', UpdateQueuesHandler,
     {'zk_client': zk_client, 'ua_client': ua_client})
  ])
  logger.info('Starting AdminServer')
  app.listen(args.port)

  management_app = web.Application([
    ('/', ServiceManagerHandler, {'service_manager': service_manager})])
  management_server = HTTPServer(management_app)
  management_socket = bind_unix_socket(ServiceManagerHandler.SOCKET_PATH)
  management_server.add_socket(management_socket)

  io_loop = IOLoop.current()
  io_loop.start()
Example #12
0
# The amount of seconds to wait before retrying to add routing.
ROUTING_RETRY_INTERVAL = 5

PIDFILE_TEMPLATE = os.path.join('/', 'var', 'run', 'appscale',
                                'app___{project}-{port}.pid')

# The number of seconds an instance is allowed to finish serving requests after
# it receives a shutdown signal.
MAX_INSTANCE_RESPONSE_TIME = 600

# A DeploymentConfig accessor.
deployment_config = None

# An interface for working with Monit.
monit_operator = MonitOperator()


class BadConfigurationException(Exception):
    """ An application is configured incorrectly. """
    def __init__(self, value):
        Exception.__init__(self, value)
        self.value = value

    def __str__(self):
        return repr(self.value)


class NoRedirection(urllib2.HTTPErrorProcessor):
    """ A url opener that does not automatically redirect. """
    def http_response(self, request, response):
def start_app(version_key, config):
    """ Starts a Google App Engine application on this machine. It
      will start it up and then proceed to fetch the main page.

  Args:
    version_key: A string specifying a version key.
    config: a dictionary that contains
      app_port: An integer specifying the port to use.
      login_server: The server address the AppServer will use for login urls.
  """
    if 'app_port' not in config:
        raise BadConfigurationException('app_port is required')
    if 'login_server' not in config or not config['login_server']:
        raise BadConfigurationException('login_server is required')

    login_server = config['login_server']

    project_id, service_id, version_id = version_key.split(
        VERSION_PATH_SEPARATOR)

    if not misc.is_app_name_valid(project_id):
        raise BadConfigurationException(
            'Invalid project ID: {}'.format(project_id))

    try:
        service_manager = projects_manager[project_id][service_id]
        version_details = service_manager[version_id].version_details
    except KeyError:
        raise BadConfigurationException('Version not found')

    runtime = version_details['runtime']
    env_vars = version_details.get('envVariables', {})
    runtime_params = deployment_config.get_config('runtime_parameters')
    max_memory = runtime_params.get('default_max_appserver_memory',
                                    DEFAULT_MAX_APPSERVER_MEMORY)
    if 'instanceClass' in version_details:
        max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'],
                                          max_memory)

    revision_key = VERSION_PATH_SEPARATOR.join(
        [project_id, service_id, version_id,
         str(version_details['revision'])])
    source_archive = version_details['deployment']['zip']['sourceUrl']

    api_server_port = yield ensure_api_server(project_id)
    yield source_manager.ensure_source(revision_key, source_archive, runtime)

    logging.info('Starting {} application {}'.format(runtime, project_id))

    pidfile = PIDFILE_TEMPLATE.format(revision=revision_key,
                                      port=config['app_port'])

    if runtime == constants.GO:
        env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, revision_key, 'gopath')
        env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot')

    watch = ''.join([MONIT_INSTANCE_PREFIX, revision_key])
    if runtime in (constants.PYTHON27, constants.GO, constants.PHP):
        start_cmd = create_python27_start_cmd(project_id, login_server,
                                              config['app_port'], pidfile,
                                              revision_key, api_server_port)
        env_vars.update(create_python_app_env(login_server, project_id))
    elif runtime == constants.JAVA:
        # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread
        # stacks (~20MB).
        max_heap = max_memory - 250
        if max_heap <= 0:
            raise BadConfigurationException(
                'Memory for Java applications must be greater than 250MB')

        start_cmd = create_java_start_cmd(project_id, config['app_port'],
                                          login_server, max_heap, pidfile,
                                          revision_key, api_server_port)

        env_vars.update(create_java_app_env(project_id))
    else:
        raise BadConfigurationException('Unknown runtime {} for {}'.format(
            runtime, project_id))

    logging.info("Start command: " + str(start_cmd))
    logging.info("Environment variables: " + str(env_vars))

    monit_app_configuration.create_config_file(watch,
                                               start_cmd,
                                               pidfile,
                                               config['app_port'],
                                               env_vars,
                                               max_memory,
                                               options.syslog_server,
                                               check_port=True,
                                               kill_exceeded_memory=True)

    full_watch = '{}-{}'.format(watch, config['app_port'])

    monit_operator = MonitOperator()
    yield monit_operator.reload(thread_pool)
    yield monit_operator.send_command_retry_process(full_watch, 'start')

    # Make sure the version node exists.
    zk_client.ensure_path('/'.join([VERSION_REGISTRATION_NODE, version_key]))

    # Since we are going to wait, possibly for a long time for the
    # application to be ready, we do it later.
    IOLoop.current().spawn_callback(add_routing,
                                    Instance(revision_key, config['app_port']))

    if project_id == DASHBOARD_PROJECT_ID:
        log_size = DASHBOARD_LOG_SIZE
    else:
        log_size = APP_LOG_SIZE

    if not setup_logrotate(project_id, log_size):
        logging.error(
            "Error while setting up log rotation for application: {}".format(
                project_id))