def _stats_from_node_async(self, node_ip, newer_than, include_lists): if node_ip == appscale_info.get_private_ip(): snapshot = self.local_stats_source.get_current() else: snapshot = yield self._fetch_remote_stats_async( node_ip, newer_than, include_lists) raise gen.Return(snapshot)
def get_current(self): """ Method for building a list of ProcessStats. It parses output of `monit status` and generates ProcessStats object for each monitored service. Returns: An instance ofProcessesStatsSnapshot. """ monit_status = subprocess.check_output('monit status', shell=True) processes_stats = [] for match in MONIT_PROCESS_PATTERN.finditer(monit_status): monit_name = match.group('name') pid = int(match.group('pid')) service = find_service_by_monit_name(monit_name) private_ip = appscale_info.get_private_ip() try: stats = _process_stats(pid, service, monit_name, private_ip) processes_stats.append(stats) except psutil.Error as err: logging.warn( u"Unable to get process stats for {monit_name} ({err})". format(monit_name=monit_name, err=err)) stats = ProcessesStatsSnapshot(utc_timestamp=time.mktime( datetime.utcnow().timetuple()), processes_stats=processes_stats) if time.time() - self.last_debug > LOCAL_STATS_DEBUG_INTERVAL: ProcessesStatsSource.last_debug = time.time() logging.debug(stats) return stats
def add_routing(app, port): """ Tells the AppController to begin routing traffic to an AppServer. Args: app: A string that contains the application ID. port: A string that contains the port that the AppServer listens on. """ logging.info("Waiting for application {} on port {} to be active.".format( str(app), str(port))) if not wait_on_app(port): # In case the AppServer fails we let the AppController to detect it # and remove it if it still show in monit. logging.warning("AppServer did not come up in time, for {}:{}.".format( str(app), str(port))) return acc = appscale_info.get_appcontroller_client() appserver_ip = appscale_info.get_private_ip() while True: result = acc.add_routing_for_appserver(app, appserver_ip, port) if result == AppControllerClient.NOT_READY: logging.info('AppController not yet ready to add routing.') time.sleep(ROUTING_RETRY_INTERVAL) else: break logging.info('Successfully established routing for {} on port {}'.format( app, port))
def main(): """ Starts a web service for handing datastore requests. """ global datastore_access global server_node global zookeeper zookeeper_locations = appscale_info.get_zk_locations_string() parser = argparse.ArgumentParser() parser.add_argument('-t', '--type', choices=dbconstants.VALID_DATASTORES, default=dbconstants.VALID_DATASTORES[0], help='Database type') parser.add_argument('-p', '--port', type=int, default=dbconstants.DEFAULT_PORT, help='Datastore server port') parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logger.setLevel(logging.DEBUG) options.define('private_ip', appscale_info.get_private_ip()) options.define('port', args.port) server_node = '{}/{}:{}'.format(DATASTORE_SERVERS_NODE, options.private_ip, options.port) datastore_batch = DatastoreFactory.getDatastore( args.type, log_level=logger.getEffectiveLevel()) zookeeper = zktransaction.ZKTransaction( host=zookeeper_locations, db_access=datastore_batch, log_level=logger.getEffectiveLevel()) zookeeper.handle.add_listener(zk_state_listener) zookeeper.handle.ensure_path(DATASTORE_SERVERS_NODE) # Since the client was started before adding the listener, make sure the # server node gets created. zk_state_listener(zookeeper.handle.state) zookeeper.handle.ChildrenWatch(DATASTORE_SERVERS_NODE, update_servers_watch) transaction_manager = TransactionManager(zookeeper.handle) datastore_access = DatastoreDistributed( datastore_batch, transaction_manager, zookeeper=zookeeper, log_level=logger.getEffectiveLevel()) server = tornado.httpserver.HTTPServer(pb_application) server.listen(args.port) IOLoop.current().start()
def main(): file_io.set_logging_format() logging.getLogger().setLevel(logging.INFO) zk_ips = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_ips)) zk_client.start() deployment_config = DeploymentConfig(zk_client) projects_manager = GlobalProjectsManager(zk_client) thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) source_manager.configure_automatic_fetch(projects_manager) monit_operator = MonitOperator() options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) options.define('db_proxy', appscale_info.get_db_proxy()) options.define('tq_proxy', appscale_info.get_tq_proxy()) options.define('secret', appscale_info.get_secret()) routing_client = RoutingClient(zk_client, options.private_ip, options.secret) instance_manager = InstanceManager(zk_client, monit_operator, routing_client, projects_manager, deployment_config, source_manager, options.syslog_server, thread_pool, options.private_ip) instance_manager.start() logger.info('Starting AppManager') io_loop = IOLoop.current() io_loop.run_sync(instance_manager.populate_api_servers) io_loop.start()
def wait_on_app(port): """ Waits for the application hosted on this machine, on the given port, to respond to HTTP requests. Args: port: Port where app is hosted on the local machine Returns: True on success, False otherwise """ retries = math.ceil(START_APP_TIMEOUT / BACKOFF_TIME) private_ip = appscale_info.get_private_ip() url = "http://" + private_ip + ":" + str(port) + FETCH_PATH while retries > 0: try: opener = urllib2.build_opener(NoRedirection) response = opener.open(url) if response.code != HTTP_OK: logging.warning('{} returned {}. Headers: {}'.format( url, response.code, response.headers.headers)) return True except IOError: retries -= 1 time.sleep(BACKOFF_TIME) logging.error('Application did not come up on {} after {} seconds'.format( url, START_APP_TIMEOUT)) return False
def __init__(self, zk_client, solr_zk_root, settings): """ Initializes SolrAPI object. Configures zookeeper watching of Solr live nodes. Args: zk_client: solr_zk_root: """ self._zk_client = zk_client self._solr_zk_root = solr_zk_root self._settings = settings self._solr_live_nodes_list = [] self._solr_live_nodes_cycle = itertools.cycle( self._solr_live_nodes_list) self._local_solr = None self._private_ip = appscale_info.get_private_ip() self._zk_client.ChildrenWatch( '{}/live_nodes'.format(self._solr_zk_root), self._update_live_nodes) self._collections_cache = set() self._broken_collections_cache = set() self._cache_timestamp = 0.0 # Warm-up collections cache list_collections_sync = tornado_synchronous(self.list_collections) list_collections_sync()
def main(): global datastore_path global deployment_config logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('-p', '--port', type=int, default=DEFAULT_PORT, required=True, help="The blobstore server's port") parser.add_argument('-d', '--datastore-path', required=True, help='The location of the datastore server') args = parser.parse_args() datastore_path = args.datastore_path zk_ips = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_ips)) zk_client.start() deployment_config = DeploymentConfig(zk_client) setup_env() register_location(zk_client, appscale_info.get_private_ip(), args.port) http_server = tornado.httpserver.HTTPServer( Application(), max_buffer_size=MAX_REQUEST_BUFF_SIZE, xheaders=True) http_server.listen(args.port) logger.info('Starting BlobServer on {}'.format(args.port)) tornado.ioloop.IOLoop.instance().start()
def get_current(): """ Method for building a list of ProcessStats. It parses output of `monit status` and generates ProcessStats object for each monitored service. Returns: An instance ofProcessesStatsSnapshot. """ start = time.time() monit_status = subprocess.check_output('monit status', shell=True) processes_stats = [] for match in MONIT_PROCESS_PATTERN.finditer(monit_status): monit_name = match.group('name') pid = int(match.group('pid')) service = find_service_by_monit_name(monit_name) private_ip = appscale_info.get_private_ip() try: stats = _process_stats(pid, service, monit_name, private_ip) processes_stats.append(stats) except psutil.Error as err: logging.warn(u"Unable to get process stats for {monit_name} ({err})" .format(monit_name=monit_name, err=err)) stats = ProcessesStatsSnapshot( utc_timestamp=time.mktime(datetime.now().timetuple()), processes_stats=processes_stats ) logging.info("Prepared stats about {proc} processes in {elapsed:.1f}s." .format(proc=len(processes_stats), elapsed=time.time()-start)) return stats
def main(): file_io.set_logging_format() logging.getLogger().setLevel(logging.INFO) zk_ips = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_ips)) zk_client.start() deployment_config = DeploymentConfig(zk_client) projects_manager = GlobalProjectsManager(zk_client) thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) source_manager.configure_automatic_fetch(projects_manager) monit_operator = MonitOperator() options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) options.define('db_proxy', appscale_info.get_db_proxy()) options.define('load_balancer_ip', appscale_info.get_load_balancer_ips()[0]) options.define('tq_proxy', appscale_info.get_tq_proxy()) options.define('secret', appscale_info.get_secret()) routing_client = RoutingClient(zk_client, options.private_ip, options.secret) instance_manager = InstanceManager( zk_client, monit_operator, routing_client, projects_manager, deployment_config, source_manager, options.syslog_server, thread_pool, options.private_ip) instance_manager.start() logger.info('Starting AppManager') io_loop = IOLoop.current() io_loop.run_sync(instance_manager.populate_api_servers) io_loop.start()
def main(): """ Starts the AdminServer. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('-p', '--port', type=int, default=constants.DEFAULT_PORT, help='The port to listen on') parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) options.define('login_ip', appscale_info.get_login_ip()) options.define('private_ip', appscale_info.get_private_ip()) acc = appscale_info.get_appcontroller_client() ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret) zk_client = KazooClient(hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) monit_operator = MonitOperator() all_resources = { 'acc': acc, 'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool } if options.private_ip in appscale_info.get_taskqueue_nodes(): logging.info('Starting push worker manager') GlobalPushWorkerManager(zk_client, monit_operator) app = web.Application([ ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions', VersionsHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)', VersionHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler), ('/api/queue/update', UpdateQueuesHandler, { 'zk_client': zk_client }) ]) logging.info('Starting AdminServer') app.listen(args.port) io_loop = IOLoop.current() io_loop.start()
def main(): """ Main function for running the server. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) logger.info('Starting UAServer') global bindport global db global user_schema for ii in range(1, len(sys.argv)): if sys.argv[ii] in ("-h", "--help"): usage() sys.exit() elif sys.argv[ii] in ('-t', "--type"): # type is ignored ii += 1 elif sys.argv[ii] in ('-p', "--port"): bindport = int(sys.argv[ii + 1] ) ii += 1 else: pass register_location(appscale_info.get_private_ip(), bindport) connect_to_postgres(zk_client) init_table(pg_connection_wrapper) ip = "0.0.0.0" server = SOAPpy.SOAPServer((ip, bindport)) logger.info('Serving on {}'.format(bindport)) # To debug this service, uncomment the 2 lines below. #server.config.dumpSOAPOut = 1 #server.config.dumpSOAPIn = 1 # Register soap functions. server.registerFunction(does_user_exist, funcName='does_user_exist') server.registerFunction(get_all_users, funcName='get_all_users') server.registerFunction(get_user_data, funcName='get_user_data') server.registerFunction(add_admin_for_app, funcName='add_admin_for_app') server.registerFunction(commit_new_user, funcName='commit_new_user') server.registerFunction(commit_new_token, funcName='commit_new_token') server.registerFunction(delete_user, funcName='delete_user') server.registerFunction(change_password, funcName='change_password') server.registerFunction(disable_user, funcName='disable_user') server.registerFunction(enable_user, funcName='enable_user') server.registerFunction(is_user_enabled, funcName='is_user_enabled') server.registerFunction(is_user_cloud_admin, funcName='is_user_cloud_admin') server.registerFunction(set_cloud_admin_status, funcName='set_cloud_admin_status') server.registerFunction(get_capabilities, funcName='get_capabilities') server.registerFunction(set_capabilities, funcName='set_capabilities') while 1: server.serve_forever()
def main(): """ Starts a web service for handing datastore requests. """ global datastore_access global server_node global zookeeper zookeeper_locations = appscale_info.get_zk_locations_string() parser = argparse.ArgumentParser() parser.add_argument('-t', '--type', choices=dbconstants.VALID_DATASTORES, default=dbconstants.VALID_DATASTORES[0], help='Database type') parser.add_argument('-p', '--port', type=int, default=dbconstants.DEFAULT_PORT, help='Datastore server port') parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) options.define('private_ip', appscale_info.get_private_ip()) options.define('port', args.port) taskqueue_locations = get_load_balancer_ips() server_node = '{}/{}:{}'.format(DATASTORE_SERVERS_NODE, options.private_ip, options.port) datastore_batch = DatastoreFactory.getDatastore( args.type, log_level=logger.getEffectiveLevel()) zookeeper = zktransaction.ZKTransaction( host=zookeeper_locations, db_access=datastore_batch, log_level=logger.getEffectiveLevel()) zookeeper.handle.add_listener(zk_state_listener) zookeeper.handle.ensure_path(DATASTORE_SERVERS_NODE) # Since the client was started before adding the listener, make sure the # server node gets created. zk_state_listener(zookeeper.handle.state) zookeeper.handle.ChildrenWatch(DATASTORE_SERVERS_NODE, update_servers_watch) transaction_manager = TransactionManager(zookeeper.handle) datastore_access = DatastoreDistributed( datastore_batch, transaction_manager, zookeeper=zookeeper, log_level=logger.getEffectiveLevel(), taskqueue_locations=taskqueue_locations) index_manager = IndexManager(zookeeper.handle, datastore_access, perform_admin=True) datastore_access.index_manager = index_manager server = tornado.httpserver.HTTPServer(pb_application) server.listen(args.port) IOLoop.current().start()
def _stats_from_node_async(self, node_ip, max_age, include_lists): if node_ip == appscale_info.get_private_ip(): try: snapshot = self.local_stats_source.get_current() except Exception as err: snapshot = unicode(err) logging.exception( u"Failed to prepare local stats: {err}".format(err=err)) else: snapshot = yield self._fetch_remote_stats_async( node_ip, max_age, include_lists) raise gen.Return(snapshot)
def __init__(self, version_key, port, max_connections): """ Creates a new HAProxyAppVersion instance. Args: version_key: A string specifying a version """ self.version_key = version_key self.port = port self.max_connections = max_connections self.servers = [] self._private_ip = get_private_ip()
def create_python27_start_cmd(app_name, login_ip, port, pidfile): """ Creates the start command to run the python application server. Args: app_name: The name of the application to run login_ip: The public IP of this deployment port: The local port the application server will bind to pidfile: A string specifying the pidfile location. Returns: A string of the start command. """ db_proxy = appscale_info.get_db_proxy() cmd = [ "/usr/bin/python2", constants.APPSCALE_HOME + "/AppServer/dev_appserver.py", "--port " + str(port), "--admin_port " + str(port + 10000), "--login_server " + login_ip, "--skip_sdk_update_check", "--nginx_host " + str(login_ip), "--require_indexes", "--enable_sendmail", "--xmpp_path " + login_ip, "--php_executable_path=" + str(PHP_CGI_LOCATION), "--uaserver_path " + db_proxy + ":"\ + str(constants.UA_SERVER_PORT), "--datastore_path " + db_proxy + ":"\ + str(constants.DB_SERVER_PORT), "/var/apps/" + app_name + "/app", "--host " + appscale_info.get_private_ip(), "--admin_host " + appscale_info.get_private_ip(), "--automatic_restart", "no", "--pidfile", pidfile] if app_name in TRUSTED_APPS: cmd.extend([TRUSTED_FLAG]) return ' '.join(cmd)
def _stats_from_node_async(self, node_ip, max_age, include_lists): if node_ip == appscale_info.get_private_ip(): try: snapshot = self.local_stats_source.get_current() if isinstance(snapshot, gen.Future): snapshot = yield snapshot except Exception as err: snapshot = unicode(err) logging.exception( u"Failed to prepare local stats: {err}".format(err=err)) else: snapshot = yield self._fetch_remote_stats_async( node_ip, max_age, include_lists) raise gen.Return(snapshot)
def __init__(self, block_id, port, max_connections, servers=()): """ Creates a new HAProxyListenBlock instance. Args: block_id: A string specifying the name of the listen block. port: An integer specifying the listen port. max_connections: An integer specifying the max number of connections. servers: An iterable specifying server locations. """ self.block_id = block_id self.port = port self.max_connections = max_connections self.servers = servers self._private_ip = get_private_ip()
def get_current(): """ Method for building a list of ProcessStats. It parses output of `systemctl show` and generates ProcessStats object for each service of interest. Returns: An instance ofProcessesStatsSnapshot. """ start = time.time() systemctl_show = subprocess.check_output(SYSTEMCTL_SHOW).decode() processes_stats = [] private_ip = appscale_info.get_private_ip() for match in SYSTEMCTL_SHOW_PATTERN.finditer(systemctl_show): systemd_name = match.group('name') pid = int(match.group('pid')) service = find_service_by_external_name( systemd_name, default_mapper=systemd_mapper) if service is None: continue try: stats = _process_stats(pid, service, systemd_name, private_ip) processes_stats.append(stats) except psutil.Error as err: logger.warning( "Unable to get process stats for {name} ({err})".format( name=service.name, err=err)) # Add processes managed by the ServiceManager. for server in ServiceManager.get_state(): service = find_service_by_external_name(server.monit_name) try: stats = _process_stats(server.process.pid, service, server.monit_name, private_ip) processes_stats.append(stats) except psutil.Error as error: logger.warning('Unable to get process stats for ' '{} ({})'.format(server, error)) stats = ProcessesStatsSnapshot(utc_timestamp=time.mktime( datetime.now().timetuple()), processes_stats=processes_stats) logger.info( "Prepared stats about {proc} processes in {elapsed:.1f}s.".format( proc=len(processes_stats), elapsed=time.time() - start)) return stats
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument( '-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) if is_master: global zk_client zk_client = KazooClient( hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() # Start watching profiling configs in ZooKeeper stats_app.ProfilingManager(zk_client) app = tornado.web.Application( stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db) + stats_app.get_cluster_stats_api_routes(is_master), debug=False ) app.listen(constants.HERMES_PORT) # Start loop for accepting http requests. IOLoop.instance().start() logger.info("Hermes is up and listening on port: {}." .format(constants.HERMES_PORT))
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) if is_master: global zk_client zk_client = KazooClient(hosts=','.join( appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() # Start watching profiling configs in ZooKeeper stats_app.ProfilingManager(zk_client) app = tornado.web.Application( stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db) + stats_app.get_cluster_stats_api_routes(is_master), debug=False) app.listen(constants.HERMES_PORT) # Start loop for accepting http requests. IOLoop.instance().start() logger.info("Hermes is up and listening on port: {}.".format( constants.HERMES_PORT))
def create_java_start_cmd(app_name, port, load_balancer_host, max_heap, pidfile): """ Creates the start command to run the java application server. Args: app_name: The name of the application to run port: The local port the application server will bind to load_balancer_host: The host of the load balancer max_heap: An integer specifying the max heap size in MB. pidfile: A string specifying the pidfile location. Returns: A string of the start command. """ db_proxy = appscale_info.get_db_proxy() tq_proxy = appscale_info.get_tq_proxy() java_start_script = os.path.join(constants.JAVA_APPSERVER, 'appengine-java-sdk-repacked', 'bin', 'dev_appserver.sh') # The Java AppServer needs the NGINX_PORT flag set so that it will read the # local FS and see what port it's running on. The value doesn't matter. cmd = [ java_start_script, "--port=" + str(port), #this jvm flag allows javax.email to connect to the smtp server "--jvm_flag=-Dsocket.permit_connect=true", '--jvm_flag=-Xmx{}m'.format(max_heap), '--jvm_flag=-Djava.security.egd=file:/dev/./urandom', "--disable_update_check", "--address=" + appscale_info.get_private_ip(), "--datastore_path=" + db_proxy, "--login_server=" + load_balancer_host, "--appscale_version=1", "--APP_NAME=" + app_name, "--NGINX_ADDRESS=" + load_balancer_host, "--TQ_PROXY=" + tq_proxy, "--pidfile={}".format(pidfile), os.path.dirname( locate_dir("/var/apps/" + app_name + "/app/", "WEB-INF")) ] return ' '.join(cmd)
def get_current(): """ Method for building a list of ProcessStats. It parses output of `monit status` and generates ProcessStats object for each monitored service. Returns: An instance ofProcessesStatsSnapshot. """ start = time.time() monit_status = subprocess.check_output('monit status', shell=True) processes_stats = [] private_ip = appscale_info.get_private_ip() for match in MONIT_PROCESS_PATTERN.finditer(monit_status): monit_name = match.group('name') pid = int(match.group('pid')) service = find_service_by_monit_name(monit_name) try: stats = _process_stats(pid, service, monit_name, private_ip) processes_stats.append(stats) except psutil.Error as err: logging.warn(u"Unable to get process stats for {monit_name} ({err})" .format(monit_name=monit_name, err=err)) # Add processes managed by the ServiceManager. for server in ServiceManager.get_state(): service = find_service_by_monit_name(server.monit_name) try: stats = _process_stats(server.process.pid, service, server.monit_name, private_ip) processes_stats.append(stats) except psutil.Error as error: logging.warning(u'Unable to get process stats for ' u'{} ({})'.format(server, error)) stats = ProcessesStatsSnapshot( utc_timestamp=time.mktime(datetime.now().timetuple()), processes_stats=processes_stats ) logging.info("Prepared stats about {proc} processes in {elapsed:.1f}s." .format(proc=len(processes_stats), elapsed=time.time()-start)) return stats
def main(): """ Main function which initializes and starts the tornado server. """ # Parse command line arguments parser = argparse.ArgumentParser(description='A taskqueue API server') parser.add_argument('--port', '-p', default='17447', help='TaskQueue server port') parser.add_argument('--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) # Configure zookeeper and db access zk_client = KazooClient(hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() pg_connection_wrapper.start_postgres_dsn_watch(zk_client) register_location(zk_client, appscale_info.get_private_ip(), args.port) # Initialize tornado server task_queue = distributed_tq.DistributedTaskQueue(zk_client) tq_application = prepare_taskqueue_application(task_queue) # Automatically decompress incoming requests. server = httpserver.HTTPServer(tq_application, decompress_request=True) server.listen(args.port) # Make sure taskqueue shuts down gracefully when signal is received graceful_shutdown = prepare_graceful_shutdown(zk_client, server) signal.signal(signal.SIGTERM, graceful_shutdown) signal.signal(signal.SIGINT, graceful_shutdown) logger.info('Starting TaskQueue server on port {}'.format(args.port)) ioloop.IOLoop.current().start()
async def _stats_from_node_async(self, node_ip, max_age, include_lists): """ Fetches statistics from either local or remote node. Args: node_ip: A string - remote node IP. max_age: An int - max age of cached snapshot to use (in seconds). include_lists: An instance of IncludeLists. Returns: An instance of stats snapshot. """ if node_ip == appscale_info.get_private_ip(): try: snapshot = self.local_stats_source.get_current() if inspect.isawaitable(snapshot): snapshot = await snapshot return snapshot except Exception as err: logger.error("Failed to prepare local stats: {err}".format(err=err)) raise RemoteHermesError(str(err)) else: snapshot = await self._fetch_remote_stats_async( node_ip, max_age, include_lists ) return snapshot
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') parser.add_argument('--port', type=int, default=constants.HERMES_PORT, help='The port to listen on') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) app = web.Application(middlewares=[verify_secret_middleware]) route_items = [] route_items += get_local_stats_api_routes(is_lb, is_tq, is_db) route_items += get_cluster_stats_api_routes(is_master) for route, handler in route_items: app.router.add_get(route, handler) logger.info("Starting Hermes on port: {}.".format(args.port)) web.run_app(app, port=args.port, access_log=logger, access_log_format='%a "%r" %s %bB %Tfs "%{User-Agent}i"')
def identify_as_hoster(self, project_id, source_location): """ Marks this machine as having a version's source code. Args: project_id: A string specifying a project ID. source_location: A string specifying the location of the version's source archive. """ private_ip = appscale_info.get_private_ip() hoster_node = '/apps/{}/{}'.format(project_id, private_ip) try: self.zk_client.create(hoster_node, str(source_location), ephemeral=True, makepath=True) except NodeExistsError: self.zk_client.set(hoster_node, str(source_location)) # Remove other hosters that have old code. hosters = self.zk_client.get_children('/apps/{}'.format(project_id)) old_hosters = [hoster for hoster in hosters if hoster != private_ip] for hoster in old_hosters: self.zk_client.delete('/apps/{}/{}'.format(project_id, hoster))
def create_config_file(watch, start_cmd, pidfile, port=None, env_vars=None, max_memory=None, syslog_server=None, check_port=False, kill_exceeded_memory=False, log_tag=None): """ Writes a monit configuration file for a service. Args: watch: A string which identifies this process with monit. start_cmd: The start command to start the process. pidfile: The location of the pidfile that the process creates. port: An integer specifying the port for the process. env_vars: A dictionary specifying environment variables. max_memory: An integer that specifies the maximum amount of memory in megabytes that the process should use. syslog_server: The IP address of the remote syslog server to use. check_port: A boolean specifying that monit should check host and port. kill_exceeded_memory: A boolean indicating that a process should be killed (instead of terminated). This is used when the process exceeds its memory limit. log_tag: The tag to use with logging. Default is to derive from watch. """ if check_port: assert port is not None, 'When using check_port, port must be defined' process_name = watch version_group = watch.rsplit(VERSION_PATH_SEPARATOR, 1)[0] if port is not None: process_name += '-{}'.format(port) env_vars_str = '' if env_vars is not None: for key in env_vars: env_vars_str += '{}="{}" '.format(key, env_vars[key]) bash = find_executable('bash') start_stop_daemon = find_executable('start-stop-daemon') stop_instance = find_executable('appscale-stop-instance') # /usr/local/bin is not on the path in Trusty. stop_instance_script = os.path.join('/', 'usr', 'local', 'bin', 'appscale-stop-instance') if stop_instance is None and os.path.isfile(stop_instance_script): stop_instance = stop_instance_script assert stop_instance is not None, 'Unable to find appscale-stop-instance' logfile = os.path.join( '/', 'var', 'log', 'appscale', '{}.log'.format(process_name)) if not log_tag: log_tag = version_group if syslog_server is None: bash_exec = 'exec env {vars} {start_cmd} >> {log} 2>&1'.format( vars=env_vars_str, start_cmd=start_cmd, log=logfile) else: bash_exec = ( 'exec env {vars} {start_cmd} 2>&1 | tee -a {log} | ' 'logger -t {log_tag} -u /tmp/ignored -n {syslog_server} -P 514' ).format(vars=env_vars_str, start_cmd=start_cmd, log=logfile, log_tag=log_tag, syslog_server=syslog_server) start_line = ' '.join([ start_stop_daemon, '--start', '--background', '--pidfile', pidfile, '--startas', "{} -- -c 'unset \"${{!MONIT_@}}\"; {}'".format(bash, bash_exec) ]) stop_line = '{} --watch {}'.format(stop_instance, process_name) with open(TEMPLATE_LOCATION) as template: output = template.read() output = output.format( process_name=process_name, match_clause='PIDFILE "{}"'.format(pidfile), group=version_group, start_line=start_line, stop_line=stop_line) if max_memory is not None: if kill_exceeded_memory: action = 'exec "{} --watch {} --force"'.format(stop_instance, process_name) else: action = 'restart' output += ' if totalmem > {} MB for 10 cycles then {}\n'.format( max_memory, action) if check_port: private_ip = appscale_info.get_private_ip() output += ' if failed host {} port {} for 3 cycles then restart\n'.format( private_ip, port) config_file = os.path.join(MONIT_CONFIG_DIR, 'appscale-{}.cfg'.format(process_name)) file_io.write(config_file, output) return
def create_config_file(watch, start_cmd, pidfile, port=None, env_vars=None, max_memory=None, syslog_server=None, check_port=False): """ Writes a monit configuration file for a service. Args: watch: A string which identifies this process with monit. start_cmd: The start command to start the process. pidfile: The location of the pidfile that the process creates. port: An integer specifying the port for the process. env_vars: A dictionary specifying environment variables. max_memory: An integer that specifies the maximum amount of memory in megabytes that the process should use. syslog_server: The IP address of the remote syslog server to use. check_port: A boolean specifying that monit should check host and port. """ if check_port: assert port is not None, 'When using check_port, port must be defined' process_name = watch if port is not None: process_name += '-{}'.format(port) env_vars_str = '' if env_vars is not None: for key in env_vars: env_vars_str += '{}={} '.format(key, env_vars[key]) bash = find_executable('bash') start_stop_daemon = find_executable('start-stop-daemon') logfile = os.path.join('/', 'var', 'log', 'appscale', '{}.log'.format(process_name)) stop = ('start-stop-daemon --stop --pidfile {0} --retry=TERM/20/KILL/5 && ' 'rm {0}'.format(pidfile)) if syslog_server is None: bash_exec = 'exec env {vars} {start_cmd} >> {log} 2>&1'.format( vars=env_vars_str, start_cmd=start_cmd, log=logfile) else: bash_exec = 'exec env {vars} {start_cmd} 2>&1 | tee -a {log} | '\ 'logger -t {watch} -u /tmp/ignored -n {syslog_server} -P 514'.\ format(vars=env_vars_str, start_cmd=start_cmd, log=logfile, watch=watch, syslog_server=syslog_server) start_line = ' '.join([ start_stop_daemon, '--start', '--background', '--pidfile', pidfile, '--startas', "{} -- -c '{}'".format(bash, bash_exec) ]) stop_line = "{} -c '{}'".format(bash, stop) with open(TEMPLATE_LOCATION) as template: output = template.read() output = output.format(process_name=process_name, match_clause='PIDFILE "{}"'.format(pidfile), group=watch, start_line=start_line, stop_line=stop_line) if max_memory is not None: output += ' if totalmem > {} MB for 10 cycles then restart\n'.format( max_memory) if check_port: private_ip = appscale_info.get_private_ip() output += ' if failed host {} port {} then restart\n'.format( private_ip, port) config_file = os.path.join(MONIT_CONFIG_DIR, 'appscale-{}.cfg'.format(process_name)) file_io.write(config_file, output) return
def start_app(config): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: config: a dictionary that contains app_name: Name of the application to start app_port: Port to start on language: What language the app is written in login_ip: Public ip of deployment env_vars: A dict of environment variables that should be passed to the app. max_memory: An int that names the maximum amount of memory that this App Engine app is allowed to consume before being restarted. syslog_server: The IP of the syslog server to send the application logs to. Usually it's the login private IP. Returns: PID of process on success, -1 otherwise """ config = convert_config_from_json(config) if config is None: logging.error("Invalid configuration for application") return BAD_PID if not misc.is_app_name_valid(config['app_name']): logging.error("Invalid app name for application: " + config['app_name']) return BAD_PID logging.info("Starting %s application %s" % (config['language'], config['app_name'])) env_vars = config['env_vars'] if config['language'] == constants.GO: env_vars['GOPATH'] = os.path.join('/var', 'apps', config['app_name'], 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') watch = "app___" + config['app_name'] match_cmd = "" if config['language'] == constants.PYTHON27 or \ config['language'] == constants.GO or \ config['language'] == constants.PHP: start_cmd = create_python27_start_cmd(config['app_name'], config['login_ip'], config['app_port']) stop_cmd = create_python27_stop_cmd(config['app_port']) env_vars.update( create_python_app_env(config['login_ip'], config['app_name'])) elif config['language'] == constants.JAVA: remove_conflicting_jars(config['app_name']) copy_successful = copy_modified_jars(config['app_name']) if not copy_successful: return BAD_PID # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = config['max_memory'] - 250 if max_heap <= 0: return BAD_PID start_cmd = create_java_start_cmd(config['app_name'], config['app_port'], config['login_ip'], max_heap) match_cmd = "java -ea -cp.*--port={}.*{}".format( str(config['app_port']), os.path.dirname( locate_dir("/var/apps/" + config['app_name'] + "/app/", "WEB-INF"))) stop_cmd = create_java_stop_cmd(config['app_port']) env_vars.update(create_java_app_env(config['app_name'])) else: logging.error("Unknown application language %s for appname %s" \ % (config['language'], config['app_name'])) return BAD_PID logging.info("Start command: " + str(start_cmd)) logging.info("Stop command: " + str(stop_cmd)) logging.info("Environment variables: " + str(env_vars)) # Set the syslog_server is specified. syslog_server = "" if 'syslog_server' in config: syslog_server = config['syslog_server'] monit_app_configuration.create_config_file(str(watch), str(start_cmd), str(stop_cmd), [config['app_port']], env_vars, config['max_memory'], syslog_server, appscale_info.get_private_ip(), match_cmd=match_cmd) # We want to tell monit to start the single process instead of the # group, since monit can get slow if there are quite a few processes in # the same group. full_watch = "{}-{}".format(str(watch), str(config['app_port'])) if not monit_interface.start(full_watch, is_group=False): logging.warning("Monit was unable to start {}:{}".format( str(config['app_name']), config['app_port'])) return BAD_PID # Since we are going to wait, possibly for a long time for the # application to be ready, we do it in a thread. threading.Thread(target=add_routing, args=(config['app_name'], config['app_port'])).start() if 'log_size' in config.keys(): log_size = config['log_size'] else: if config['app_name'] == APPSCALE_DASHBOARD_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(config['app_name'], watch, log_size): logging.error( "Error while setting up log rotation for application: {}".format( config['app_name'])) return 0
def main(): """ Starts the AdminServer. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser( prog='appscale-admin', description='Manages AppScale-related processes') subparsers = parser.add_subparsers(dest='command') subparsers.required = True serve_parser = subparsers.add_parser( 'serve', description='Starts the server that manages AppScale processes') serve_parser.add_argument( '-p', '--port', type=int, default=constants.DEFAULT_PORT, help='The port to listen on') serve_parser.add_argument( '-v', '--verbose', action='store_true', help='Output debug-level logging') subparsers.add_parser( 'summary', description='Lists AppScale processes running on this machine') restart_parser = subparsers.add_parser( 'restart', description='Restart AppScale processes running on this machine') restart_parser.add_argument('service', nargs='+', help='The process or service ID to restart') args = parser.parse_args() if args.command == 'summary': table = sorted(list(get_combined_services().items())) print(tabulate(table, headers=['Service', 'State'])) sys.exit(0) if args.command == 'restart': socket_path = urlquote(ServiceManagerHandler.SOCKET_PATH, safe='') session = requests_unixsocket.Session() response = session.post( 'http+unix://{}/'.format(socket_path), data={'command': 'restart', 'arg': [args.service]}) response.raise_for_status() return if args.verbose: logger.setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) options.define('login_ip', appscale_info.get_login_ip()) options.define('private_ip', appscale_info.get_private_ip()) options.define('load_balancers', appscale_info.get_load_balancer_ips()) acc = appscale_info.get_appcontroller_client() ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret) zk_client = KazooClient( hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) monit_operator = MonitOperator() all_resources = { 'acc': acc, 'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool } if options.private_ip in appscale_info.get_taskqueue_nodes(): logger.info('Starting push worker manager') GlobalPushWorkerManager(zk_client, monit_operator) service_manager = ServiceManager(zk_client) service_manager.start() app = web.Application([ ('/oauth/token', OAuthHandler, {'ua_client': ua_client}), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions', VersionsHandler, {'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool}), ('/v1/projects', ProjectsHandler, all_resources), ('/v1/projects/([a-z0-9-]+)', ProjectHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)', ServiceHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)', VersionHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler, {'ua_client': ua_client}), ('/api/cron/update', UpdateCronHandler, {'acc': acc, 'zk_client': zk_client, 'ua_client': ua_client}), ('/api/datastore/index/add', UpdateIndexesHandler, {'zk_client': zk_client, 'ua_client': ua_client}), ('/api/queue/update', UpdateQueuesHandler, {'zk_client': zk_client, 'ua_client': ua_client}) ]) logger.info('Starting AdminServer') app.listen(args.port) management_app = web.Application([ ('/', ServiceManagerHandler, {'service_manager': service_manager})]) management_server = HTTPServer(management_app) management_socket = bind_unix_socket(ServiceManagerHandler.SOCKET_PATH) management_server.add_socket(management_socket) io_loop = IOLoop.current() io_loop.start()
# MAIN ################################ if __name__ == "__main__": file_io.set_logging_format() logging.getLogger().setLevel(logging.INFO) zk_ips = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_ips)) zk_client.start() deployment_config = DeploymentConfig(zk_client) projects_manager = GlobalProjectsManager(zk_client) thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) options.define('db_proxy', appscale_info.get_db_proxy()) options.define('tq_proxy', appscale_info.get_tq_proxy()) app = tornado.web.Application([ ('/versions/([a-z0-9-_]+)', VersionHandler), ('/versions/([a-z0-9-_]+)/([0-9-]+)', InstanceHandler) ]) app.listen(constants.APP_MANAGER_PORT) logging.info('Starting AppManager on {}'.format(constants.APP_MANAGER_PORT)) io_loop = IOLoop.current() io_loop.run_sync(populate_api_servers) io_loop.start()
def main(): """ Starts a web service for handing datastore requests. """ global datastore_access global server_node global zk_client zookeeper_locations = appscale_info.get_zk_locations_string() if not zookeeper_locations: zookeeper_locations = 'localhost:2181' parser = argparse.ArgumentParser() parser.add_argument('-t', '--type', choices=dbconstants.VALID_DATASTORES, default=dbconstants.VALID_DATASTORES[0], help='Database type') parser.add_argument('--fdb-clusterfile', default=None, help='Location of FoundationDB clusterfile') parser.add_argument('-p', '--port', type=int, default=dbconstants.DEFAULT_PORT, help='Datastore server port') parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) options.define('private_ip', appscale_info.get_private_ip()) options.define('port', args.port) taskqueue_locations = get_load_balancer_ips() server_node = '{}/{}:{}'.format(DATASTORE_SERVERS_NODE, options.private_ip, options.port) retry_policy = KazooRetry(max_tries=5) zk_client = kazoo.client.KazooClient( hosts=zookeeper_locations, connection_retry=ZK_PERSISTENT_RECONNECTS, command_retry=retry_policy) zk_client.start() if args.type == 'cassandra': datastore_batch = DatastoreFactory.getDatastore( args.type, log_level=logger.getEffectiveLevel()) zookeeper = zktransaction.ZKTransaction( zk_client=zk_client, db_access=datastore_batch, log_level=logger.getEffectiveLevel()) transaction_manager = TransactionManager(zk_client) datastore_access = DatastoreDistributed( datastore_batch, transaction_manager, zookeeper=zookeeper, log_level=logger.getEffectiveLevel(), taskqueue_locations=taskqueue_locations) else: from appscale.datastore.fdb.fdb_datastore import FDBDatastore clusterfile_path = args.fdb_clusterfile if not clusterfile_path: try: clusterfile_content = zk_client.get(FDB_CLUSTERFILE_NODE)[0] clusterfile_path = '/run/appscale/appscale-datastore-fdb.cluster' with open(clusterfile_path, 'w') as clusterfile: clusterfile.write(clusterfile_content) except NoNodeError: logger.warning( 'Neither --fdb-clusterfile was specified nor {} ZK node exists,' 'FDB client will try to find clusterfile in one of default locations' .format(FDB_CLUSTERFILE_NODE)) datastore_access = FDBDatastore() datastore_access.start(clusterfile_path) zk_client.add_listener(zk_state_listener) zk_client.ensure_path(DATASTORE_SERVERS_NODE) # Since the client was started before adding the listener, make sure the # server node gets created. zk_state_listener(zk_client.state) zk_client.ChildrenWatch(DATASTORE_SERVERS_NODE, update_servers_watch) if args.type == 'cassandra': index_manager = IndexManager(zk_client, datastore_access, perform_admin=True) datastore_access.index_manager = index_manager server = tornado.httpserver.HTTPServer(pb_application) server.listen(args.port) IOLoop.current().start()
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument( '-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) if is_master: # Periodically check with the portal for new tasks. # Note: Currently, any active handlers from the tornado app will block # polling until they complete. PeriodicCallback(poll, constants.POLLING_INTERVAL).start() # Only master Hermes node handles /do_task route task_route = ('/do_task', TaskHandler) global zk_client zk_client = KazooClient( hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() # Start watching profiling configs in ZooKeeper stats_app.ProfilingManager(zk_client) # Periodically checks if the deployment is registered and uploads the # appscalesensor app for registered deployments. sensor_deployer = SensorDeployer(zk_client) PeriodicCallback(sensor_deployer.deploy, constants.UPLOAD_SENSOR_INTERVAL).start() else: task_route = ('/do_task', Respond404Handler, dict(reason='Hermes slaves do not manage tasks from Portal')) app = tornado.web.Application([ ("/", MainHandler), task_route, ] + stats_app.get_local_stats_api_routes(is_lb, is_tq) + stats_app.get_cluster_stats_api_routes(is_master), debug=False ) app.listen(constants.HERMES_PORT) # Start loop for accepting http requests. IOLoop.instance().start() logging.info("Hermes is up and listening on port: {}." .format(constants.HERMES_PORT))
def main(): """ Main. """ parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) is_lb = (my_ip in appscale_info.get_load_balancer_ips()) is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) if is_master: # Periodically check with the portal for new tasks. # Note: Currently, any active handlers from the tornado app will block # polling until they complete. PeriodicCallback(poll, constants.POLLING_INTERVAL).start() # Only master Hermes node handles /do_task route task_route = ('/do_task', TaskHandler) global zk_client zk_client = KazooClient(hosts=','.join( appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() # Start watching profiling configs in ZooKeeper stats_app.ProfilingManager(zk_client) # Periodically checks if the deployment is registered and uploads the # appscalesensor app for registered deployments. sensor_deployer = SensorDeployer(zk_client) PeriodicCallback(sensor_deployer.deploy, constants.UPLOAD_SENSOR_INTERVAL).start() else: task_route = ( '/do_task', Respond404Handler, dict(reason='Hermes slaves do not manage tasks from Portal')) app = tornado.web.Application( [ ("/", MainHandler), task_route, ] + stats_app.get_local_stats_api_routes(is_lb, is_tq) + stats_app.get_cluster_stats_api_routes(is_master), debug=False) app.listen(constants.HERMES_PORT) # Start loop for accepting http requests. IOLoop.instance().start() logging.info("Hermes is up and listening on port: {}.".format( constants.HERMES_PORT))
def main(): """ Starts the AdminServer. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser( prog='appscale-admin', description='Manages AppScale-related processes') subparsers = parser.add_subparsers(dest='command') subparsers.required = True serve_parser = subparsers.add_parser( 'serve', description='Starts the server that manages AppScale processes') serve_parser.add_argument( '-p', '--port', type=int, default=constants.DEFAULT_PORT, help='The port to listen on') serve_parser.add_argument( '-v', '--verbose', action='store_true', help='Output debug-level logging') subparsers.add_parser( 'summary', description='Lists AppScale processes running on this machine') args = parser.parse_args() if args.command == 'summary': table = sorted(list(get_combined_services().items())) print(tabulate(table, headers=['Service', 'State'])) sys.exit(0) if args.verbose: logger.setLevel(logging.DEBUG) options.define('secret', appscale_info.get_secret()) options.define('login_ip', appscale_info.get_login_ip()) options.define('private_ip', appscale_info.get_private_ip()) options.define('load_balancers', appscale_info.get_load_balancer_ips()) acc = appscale_info.get_appcontroller_client() ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret) zk_client = KazooClient( hosts=','.join(appscale_info.get_zk_node_ips()), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) monit_operator = MonitOperator() all_resources = { 'acc': acc, 'ua_client': ua_client, 'zk_client': zk_client, 'version_update_lock': version_update_lock, 'thread_pool': thread_pool } if options.private_ip in appscale_info.get_taskqueue_nodes(): logger.info('Starting push worker manager') GlobalPushWorkerManager(zk_client, monit_operator) service_manager = ServiceManager(zk_client) service_manager.start() app = web.Application([ ('/oauth/token', OAuthHandler, {'ua_client': ua_client}), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions', VersionsHandler, all_resources), ('/v1/projects', ProjectsHandler, all_resources), ('/v1/projects/([a-z0-9-]+)', ProjectHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)', ServiceHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/services/([a-z0-9-]+)/versions/([a-z0-9-]+)', VersionHandler, all_resources), ('/v1/apps/([a-z0-9-]+)/operations/([a-z0-9-]+)', OperationsHandler, {'ua_client': ua_client}), ('/api/cron/update', UpdateCronHandler, {'acc': acc, 'zk_client': zk_client, 'ua_client': ua_client}), ('/api/queue/update', UpdateQueuesHandler, {'zk_client': zk_client, 'ua_client': ua_client}) ]) logger.info('Starting AdminServer') app.listen(args.port) io_loop = IOLoop.current() io_loop.start()
def get_current(): """ Method for building an instance of NodeStatsSnapshot. It collects information about usage of main resource on the machine. Returns: An object of NodeStatsSnapshot with detailed explanation of resources used on the machine """ utc_timestamp = time.mktime(datetime.now().timetuple()) start = time.time() private_ip = appscale_info.get_private_ip() # CPU usage cpu_times = psutil.cpu_times() cpu = NodeCPU( user=cpu_times.user, system=cpu_times.system, idle=cpu_times.idle, percent=psutil.cpu_percent(), count=psutil.cpu_count() ) # AvgLoad loadavg = NodeLoadAvg(*os.getloadavg()) # Memory usage virtual = psutil.virtual_memory() memory = NodeMemory( total=virtual.total, available=virtual.available, used=virtual.used) # Swap usage swap_mem = psutil.swap_memory() swap = NodeSwap( total=swap_mem.total, free=swap_mem.free, used=swap_mem.used ) # Disk usage partitions = psutil.disk_partitions() partitions_dict = {} for part in partitions: usage = psutil.disk_usage(part.mountpoint) partitions_dict[part.mountpoint] = NodePartition( total=usage.total, used=usage.used, free=usage.free ) io_counters = psutil.disk_io_counters() disk_io = NodeDiskIO( read_count=io_counters.read_count, write_count=io_counters.write_count, read_bytes=io_counters.read_bytes, write_bytes=io_counters.write_bytes, read_time=io_counters.read_time, write_time=io_counters.write_time ) # Network usage network_io = psutil.net_io_counters() network = NodeNetwork( bytes_sent=network_io.bytes_sent, bytes_recv=network_io.bytes_recv, packets_sent=network_io.packets_sent, packets_recv=network_io.packets_recv, errin=network_io.errin, errout=network_io.errout, dropin=network_io.dropin, dropout=network_io.dropout, connections_num=len(psutil.net_connections()) ) stats = NodeStatsSnapshot( utc_timestamp=utc_timestamp, private_ip=private_ip, cpu=cpu, memory=memory, swap=swap, disk_io=disk_io, partitions_dict=partitions_dict, network=network, loadavg=loadavg ) logger.info("Prepared local node stats in {elapsed:.1f}s." .format(elapsed=time.time()-start)) return stats
for ii in REQUIRED_CONFIG_FIELDS: try: if config[ii]: pass except KeyError: logging.error("Unable to find " + str(ii) + " in configuration") return False return True ################################ # MAIN ################################ if __name__ == "__main__": file_io.set_logging_format() deployment_config = DeploymentConfig( appscale_info.get_zk_locations_string()) INTERNAL_IP = appscale_info.get_private_ip() SERVER = SOAPpy.SOAPServer((INTERNAL_IP, constants.APP_MANAGER_PORT)) SERVER.registerFunction(start_app) SERVER.registerFunction(stop_app) SERVER.registerFunction(stop_app_instance) while 1: try: SERVER.serve_forever() except SSL.SSLError: pass
################################ if __name__ == "__main__": file_io.set_logging_format() logging.getLogger().setLevel(logging.INFO) zk_ips = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_ips)) zk_client.start() deployment_config = DeploymentConfig(zk_client) projects_manager = GlobalProjectsManager(zk_client) thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) source_manager.configure_automatic_fetch(projects_manager) options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) options.define('db_proxy', appscale_info.get_db_proxy()) options.define('tq_proxy', appscale_info.get_tq_proxy()) options.define('secret', appscale_info.get_secret()) running_instances = recover_state(zk_client) PeriodicCallback(stop_failed_instances, INSTANCE_CLEANUP_INTERVAL * 1000).start() app = tornado.web.Application([('/versions/([a-z0-9-_]+)', VersionHandler), ('/versions/([a-z0-9-_]+)/([0-9-]+)', InstanceHandler)]) app.listen(constants.APP_MANAGER_PORT) logging.info('Starting AppManager on {}'.format(