def terminate_virtualized_cluster(cls, keyname, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ AppScaleLogger.log( "Terminating appscale deployment with keyname {0}".format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: all_ips = acc.get_all_public_ips() except Exception as exception: AppScaleLogger.warn( 'Saw Exception while getting deployments IPs {0}'.format( str(exception))) all_ips = LocalState.get_all_public_ips(keyname) threads = [] for ip in all_ips: thread = threading.Thread(target=cls.stop_remote_appcontroller, args=(ip, keyname, is_verbose)) thread.start() threads.append(thread) for thread in threads: thread.join() boxes_shut_down = 0 is_running_regex = re.compile("appscale-controller stop") for ip in all_ips: AppScaleLogger.log( "Shutting down AppScale API services at {0}".format(ip)) while True: remote_output = cls.ssh(ip, keyname, 'ps x', is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) if not is_running_regex.match(remote_output): break time.sleep(0.3) boxes_shut_down += 1 if boxes_shut_down != len(all_ips): raise AppScaleException( "Couldn't terminate your AppScale deployment on" " all machines - please do so manually.") AppScaleLogger.log( "Terminated AppScale on {0} machines.".format(boxes_shut_down))
def confirm_or_abort(cls, message): AppScaleLogger.warn(message) confirm = raw_input("Are you sure you want to do this? (Y/N) ") if confirm.lower() == "y" or confirm.lower() == "yes": return else: raise AppScaleException("AppScale termination was cancelled.")
def confirm_or_abort(cls, message): AppScaleLogger.warn(message) confirm = raw_input("Are you sure you want to do this? (Y/N) ") if confirm.lower() == 'y' or confirm.lower() == 'yes': return else: raise AppScaleException('AppScale termination was cancelled.')
def logs(self, location): """'logs' provides a cleaner experience for users than the appscale-gather-logs command, by using the configuration options present in the AppScalefile found in the current working directory. Args: location: The path on the local filesystem where logs should be copied to. Raises: AppScalefileException: If there is no AppScalefile in the current working directory. """ contents = self.read_appscalefile() contents_as_yaml = yaml.safe_load(contents) # construct the appscale-gather-logs command command = [] if 'keyname' in contents_as_yaml: command.append("--keyname") command.append(contents_as_yaml["keyname"]) command.append("--location") command.append(location) # and exec it options = ParseArgs(command, "appscale-gather-logs").args try: AppScaleTools.gather_logs(options) except Exception as e: AppScaleLogger.warn(str(e))
def shut_down_appscale_if_running(cls, options): """ Checks if AppScale is running and shuts it down as this is an offline upgrade. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ if os.path.exists(LocalState.get_secret_key_location(options.keyname)): AppScaleLogger.warn("AppScale needs to be down for this upgrade. " "Upgrade process could take a while and it is not reversible.") if not options.test: response = raw_input( 'Are you sure you want to proceed with shutting down AppScale to ' 'continue the upgrade? (y/N) ') if response.lower() not in ['y', 'yes']: raise AppScaleException("Cancelled AppScale upgrade.") AppScaleLogger.log("Shutting down AppScale...") cls.terminate_instances(options) else: AppScaleLogger.warn("Upgrade process could take a while and it is not reversible.") if options.test: return response = raw_input( 'Are you sure you want to proceed with the upgrade? (y/N) ') if response.lower() not in ['y', 'yes']: raise AppScaleException("Cancelled AppScale upgrade.")
def shut_down_appscale_if_running(cls, options): """ Checks if AppScale is running and shuts it down as this is an offline upgrade. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ if os.path.exists(LocalState.get_secret_key_location(options.keyname)): AppScaleLogger.warn( "AppScale needs to be down for this upgrade. " "Upgrade process could take a while and it is not reversible." ) if not options.test: response = raw_input( "Are you sure you want to proceed with shutting down AppScale to " "continue the upgrade? (y/N) " ) if response.lower() not in ["y", "yes"]: raise AppScaleException("Cancelled AppScale upgrade.") AppScaleLogger.log("Shutting down AppScale...") cls.terminate_instances(options) else: AppScaleLogger.warn("Upgrade process could take a while and it is not reversible.") if options.test: return response = raw_input("Are you sure you want to proceed with the upgrade? (y/N) ") if response.lower() not in ["y", "yes"]: raise AppScaleException("Cancelled AppScale upgrade.")
def destroy(self): """'destroy' provides a nicer experience for users than the appscale-terminate-instances command, by using the configuration options present in the AppScalefile found in the current working directory. Raises: AppScalefileException: If there is no AppScalefile in the current working directory. """ contents = self.read_appscalefile() # Construct a terminate-instances command from the file's contents command = [] contents_as_yaml = yaml.safe_load(contents) if 'keyname' in contents_as_yaml: command.append("--keyname") command.append(contents_as_yaml['keyname']) if 'verbose' in contents_as_yaml: command.append("--verbose") # Finally, exec the command. Don't worry about validating it - # appscale-terminate-instances will do that for us. options = ParseArgs(command, "appscale-terminate-instances").args try: AppScaleTools.terminate_instances(options) except Exception as e: AppScaleLogger.warn(str(e))
def describe_instances(cls, options): """Queries each node in the currently running AppScale deployment and reports on their status. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ login_host = LocalState.get_login_host(options.keyname) login_acc = AppControllerClient( login_host, LocalState.get_secret_key(options.keyname)) for ip in login_acc.get_all_public_ips(): acc = AppControllerClient( ip, LocalState.get_secret_key(options.keyname)) AppScaleLogger.log("Status of node at {0}:".format(ip)) try: AppScaleLogger.log(acc.get_status()) except Exception as exception: AppScaleLogger.warn("Unable to contact machine: {0}\n".format( str(exception))) AppScaleLogger.success("View status information about your AppScale " + \ "deployment at http://{0}:{1}/status".format(login_host, RemoteHelper.APP_DASHBOARD_PORT))
def expand_roles(self): """Converts the 'master' composite role into the roles it represents, and adds dependencies necessary for the 'login' and 'database' roles. """ for i in range(len(self.roles)): role = self.roles[i] if role in NodeLayout.DEPRECATED_ROLES: AppScaleLogger.warn("'{}' role has been deprecated, please use '{}'" .format(role, NodeLayout.DEPRECATED_ROLES[role])) self.roles.remove(role) self.roles.append(NodeLayout.DEPRECATED_ROLES[role]) if 'master' in self.roles: self.roles.remove('master') self.roles.append('shadow') self.roles.append('load_balancer') if 'login' in self.roles: self.roles.append('load_balancer') # TODO: remove these, db_slave and taskqueue_slave are currently deprecated. if 'db_slave' in self.roles or 'db_master' in self.roles \ and 'database' not in self.roles: self.roles.append('database') if 'taskqueue_slave' in self.roles or 'taskqueue_master' in self.roles \ and 'taskqueue' not in self.roles: self.roles.append('taskqueue') # Remove any duplicate roles self.roles = list(set(self.roles))
def expand_roles(self): """Converts the 'master' composite role into the roles it represents, and adds dependencies necessary for the 'login' and 'database' roles. """ for i in range(len(self.roles)): role = self.roles[i] if role in NodeLayout.DEPRECATED_ROLES: AppScaleLogger.warn( "'{}' role has been deprecated, please use '{}'".format( role, NodeLayout.DEPRECATED_ROLES[role])) self.roles.remove(role) self.roles.append(NodeLayout.DEPRECATED_ROLES[role]) if 'master' in self.roles: self.roles.remove('master') self.roles.append('shadow') self.roles.append('load_balancer') if 'login' in self.roles: self.roles.append('load_balancer') # TODO: remove these, db_slave and taskqueue_slave are currently deprecated. if 'db_slave' in self.roles or 'db_master' in self.roles \ and 'database' not in self.roles: self.roles.append('database') if 'taskqueue_slave' in self.roles or 'taskqueue_master' in self.roles \ and 'taskqueue' not in self.roles: self.roles.append('taskqueue') # Remove any duplicate roles self.roles = list(set(self.roles))
def terminate_virtualized_cluster(cls, keyname, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ AppScaleLogger.log("Terminating appscale deployment with keyname {0}" .format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: all_ips = acc.get_all_public_ips() except Exception as exception: AppScaleLogger.warn('Saw Exception while getting deployments IPs {0}'. format(str(exception))) all_ips = LocalState.get_all_public_ips(keyname) threads = [] for ip in all_ips: thread = threading.Thread(target=cls.stop_remote_appcontroller, args=(ip, keyname, is_verbose)) thread.start() threads.append(thread) for thread in threads: thread.join() boxes_shut_down = 0 is_running_regex = re.compile("appscale-controller stop") for ip in all_ips: AppScaleLogger.log("Shutting down AppScale API services at {0}". format(ip)) while True: remote_output = cls.ssh(ip, keyname, 'ps x', is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) if not is_running_regex.match(remote_output): break time.sleep(0.3) boxes_shut_down += 1 if boxes_shut_down != len(all_ips): raise AppScaleException("Couldn't terminate your AppScale deployment on" " all machines - please do so manually.") AppScaleLogger.log("Terminated AppScale on {0} machines.". format(boxes_shut_down))
def remove_app(cls, options): """Instructs AppScale to no longer host the named application. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ if not options.confirm: response = raw_input( 'Are you sure you want to remove this application? (y/N) ') if response.lower() not in ['y', 'yes']: raise AppScaleException("Cancelled application removal.") login_host = LocalState.get_login_host(options.keyname) secret = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(login_host, secret) if not acc.is_app_running(options.appname): raise AppScaleException("The given application is not currently running.") # Makes a call to the AppController to get all the stats and looks # through them for the http port the app can be reached on. http_port = None for _ in range(cls.MAX_RETRIES + 1): result = acc.get_all_stats() try: json_result = json.loads(result) apps_result = json_result['apps'] current_app = apps_result[options.appname] http_port = current_app['http'] if http_port: break time.sleep(cls.SLEEP_TIME) except (KeyError, ValueError): AppScaleLogger.verbose("Got json error from get_all_data result.", options.verbose) time.sleep(cls.SLEEP_TIME) if not http_port: raise AppScaleException( "Unable to get the serving port for the application.") acc.stop_app(options.appname) AppScaleLogger.log("Please wait for your app to shut down.") for _ in range(cls.MAX_RETRIES + 1): if RemoteHelper.is_port_open(login_host, http_port, options.verbose): time.sleep(cls.SLEEP_TIME) AppScaleLogger.log("Waiting for {0} to terminate...".format( options.appname)) else: AppScaleLogger.success("Done shutting down {0}.".format( options.appname)) return AppScaleLogger.warn("App {0} may still be running.".format( options.appname))
def run_bootstrap(cls, ip, options, error_ips): try: RemoteHelper.ssh(ip, options.keyname, cls.BOOTSTRAP_CMD, options.verbose) AppScaleLogger.success( 'Successfully updated and built AppScale on {}'.format(ip)) except ShellException: error_ips.append(ip) AppScaleLogger.warn('Unable to upgrade AppScale code on {}.\n' 'Please correct any errors listed in /var/log/appscale/bootstrap.log ' 'on that machine and re-run appscale upgrade.'.format(ip)) return error_ips
def gather_logs(cls, options): """Collects logs from each machine in the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ # First, make sure that the place we want to store logs doesn't # already exist. if os.path.exists(options.location): raise AppScaleException("Can't gather logs, as the location you " + \ "specified, {0}, already exists.".format(options.location)) acc = AppControllerClient(LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname)) try: all_ips = acc.get_all_public_ips() except socket.error: # Occurs when the AppController has failed. AppScaleLogger.warn("Couldn't get an up-to-date listing of the " + \ "machines in this AppScale deployment. Using our locally cached " + \ "info instead.") all_ips = LocalState.get_all_public_ips(options.keyname) # do the mkdir after we get the secret key, so that a bad keyname will # cause the tool to crash and not create this directory os.mkdir(options.location) for ip in all_ips: # Get the logs from each node, and store them in our local directory local_dir = "{0}/{1}".format(options.location, ip) os.mkdir(local_dir) RemoteHelper.scp_remote_to_local(ip, options.keyname, '/var/log/appscale', local_dir, options.verbose) try: RemoteHelper.scp_remote_to_local(ip, options.keyname, '/var/log/cassandra', local_dir, options.verbose) except ShellException: pass try: RemoteHelper.scp_remote_to_local(ip, options.keyname, '/var/log/zookeeper', local_dir, options.verbose) except ShellException: pass RemoteHelper.scp_remote_to_local(ip, options.keyname, '/var/log/kern.log', local_dir, options.verbose) RemoteHelper.scp_remote_to_local(ip, options.keyname, '/var/log/syslog', local_dir, options.verbose) AppScaleLogger.success("Successfully copied logs to {0}".format( options.location))
def run_bootstrap(cls, ip, options, error_ips): try: RemoteHelper.ssh(ip, options.keyname, cls.BOOTSTRAP_CMD, options.verbose) AppScaleLogger.success("Successfully updated and built AppScale on {}".format(ip)) except ShellException: error_ips.append(ip) AppScaleLogger.warn( "Unable to upgrade AppScale code on {}.\n" "Please correct any errors listed in /var/log/appscale/bootstrap.log " "on that machine and re-run appscale upgrade.".format(ip) ) return error_ips
def publish_api_list(cls, api_list, url, keyname): eager = EagerClient(LocalState.get_login_host(keyname), LocalState.get_secret_key(keyname)) temp_api_list = [] for api in api_list: temp_api_list.append(api.to_dict()) result = eager.publish_api_list(temp_api_list, url) if result['success']: AppScaleLogger.log('{0} APIs published to API store.'.format(len(api_list))) else: AppScaleLogger.warn(result['reason']) if result.get('detail'): AppScaleLogger.warn(str(result['detail']))
def generate_crash_log(cls, exception, stacktrace): """Writes information to the local filesystem about an uncaught exception that killed an AppScale Tool's execution, to aid in debugging at a later time. Args: exception: The Exception that crashed executing an AppScale Tool, whose information we want to log for debugging purposes. stacktrace: A str that contains the newline-separated stacktrace corresponding to the given exception. Returns: The location on the filesystem where the crash log was written to. """ crash_log_filename = '{0}log-{1}'.format( LocalState.LOCAL_APPSCALE_PATH, uuid.uuid4()) try: locale.setlocale(locale.LC_ALL, '') this_locale = locale.getlocale()[0] except locale.Error: this_locale = "unknown" log_info = { # System-specific information 'platform' : platform.platform(), 'runtime' : platform.python_implementation(), 'locale' : this_locale, # Crash-specific information 'exception' : exception.__class__.__name__, 'message' : str(exception), 'stacktrace' : stacktrace.rstrip(), # AppScale Tools-specific information 'tools_version' : APPSCALE_VERSION } # If LOCAL_APPSCALE_PATH doesn't exist, create it so that we can write the # crash log. if not os.path.exists(LocalState.LOCAL_APPSCALE_PATH): os.mkdir(LocalState.LOCAL_APPSCALE_PATH) with open(crash_log_filename, 'w') as file_handle: for key, value in log_info.iteritems(): file_handle.write("{0} : {1}\n\n".format(key, value)) AppScaleLogger.warn(str(exception)) AppScaleLogger.log("\nA log with more information is available " \ "at\n{0}.".format(crash_log_filename)) return crash_log_filename
def confirm_or_abort(cls, message): """ Displays confirmation message and collects user's choice. Args: message: A str, the message to be displayed. Raises: AppScaleException: If the user chooses to terminate AppScale. """ AppScaleLogger.warn(message) confirm = raw_input("Are you sure you want to do this? (Y/N) ") if confirm.lower() == 'y' or confirm.lower() == 'yes': return else: raise AppScaleException('AppScale termination was cancelled.')
def confirm_or_abort(cls, message): """ Displays confirmation message and collects user's choice. Args: message: A str, the message to be displayed. Raises: AppScaleException: If the user chooses to terminate AppScale. """ AppScaleLogger.warn(message) confirm = raw_input("Are you sure you want to do this? (Y/N) ") if confirm.lower() == 'y' or confirm.lower() == 'yes': return else: raise AppScaleException('AppScale termination was cancelled.')
def generate_crash_log(cls, exception, stacktrace): """Writes information to the local filesystem about an uncaught exception that killed an AppScale Tool's execution, to aid in debugging at a later time. Args: exception: The Exception that crashed executing an AppScale Tool, whose information we want to log for debugging purposes. stacktrace: A str that contains the newline-separated stacktrace corresponding to the given exception. Returns: The location on the filesystem where the crash log was written to. """ crash_log_filename = '{0}log-{1}'.format( LocalState.LOCAL_APPSCALE_PATH, uuid.uuid4()) try: locale.setlocale(locale.LC_ALL, '') this_locale = locale.getlocale()[0] except locale.Error: this_locale = "unknown" log_info = { # System-specific information 'platform': platform.platform(), 'runtime': platform.python_implementation(), 'locale': this_locale, # Crash-specific information 'exception': exception.__class__.__name__, 'message': str(exception), 'stacktrace': stacktrace.rstrip(), # AppScale Tools-specific information 'tools_version': APPSCALE_VERSION } # If LOCAL_APPSCALE_PATH doesn't exist, create it so that we can write the # crash log. if not os.path.exists(LocalState.LOCAL_APPSCALE_PATH): os.mkdir(LocalState.LOCAL_APPSCALE_PATH) with open(crash_log_filename, 'w') as file_handle: for key, value in log_info.iteritems(): file_handle.write("{0} : {1}\n\n".format(key, value)) AppScaleLogger.warn(str(exception)) AppScaleLogger.log("\nA log with more information is available " \ "at\n{0}.".format(crash_log_filename)) return crash_log_filename
def __init__(self, options): """Creates a new NodeLayout from the given YAML file. Args: options: A Namespace or dict that (optionally) contains a field containing the YAML representing the placement strategy to use for this AppScale deployment. This YAML can be either raw YAML, or a str containing a path on the local filesystem that, when read, contains the YAML in question. It can also be set to None, for deployments when the user specifies how many VMs they wish to use. Raises: BadConfigurationException if configuration is not valid. """ if not isinstance(options, dict): options = vars(options) self.master = None input_yaml = options.get('ips') if isinstance(input_yaml, str): with open(input_yaml, 'r') as file_handle: self.input_yaml = yaml.safe_load(file_handle.read()) elif isinstance(input_yaml, dict): self.input_yaml = input_yaml AppScaleLogger.warn( "The AppScalefile is changing, the layout you are " "using will be invalid soon. Please see {} for more details.". format(self.APPSCALEFILE_INSTRUCTIONS)) elif isinstance(input_yaml, list): self.input_yaml = input_yaml else: self.input_yaml = None self.disks = options.get('disks') self.infrastructure = options.get('infrastructure') self.min_machines = options.get('min_machines') self.max_machines = options.get('max_machines') self.replication = options.get('replication') self.database_type = options.get('table', 'cassandra') self.add_to_existing = options.get('add_to_existing') self.default_instance_type = options.get('instance_type') self.test = options.get('test') self.force = options.get('force') if 'login_host' in options and options['login_host'] is not None: self.login_host = options['login_host'] else: self.login_host = None self.nodes = [] self.validate_node_layout()
def remove_app(cls, options): """Instructs AppScale to no longer host the named application. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ if not options.confirm: response = raw_input("Are you sure you want to remove this application? (y/N) ") if response.lower() not in ["y", "yes"]: raise AppScaleException("Cancelled application removal.") login_host = LocalState.get_login_host(options.keyname) secret = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(login_host, secret) if not acc.is_app_running(options.appname): raise AppScaleException("The given application is not currently running.") # Makes a call to the AppController to get all the stats and looks # through them for the http port the app can be reached on. http_port = None for _ in range(cls.MAX_RETRIES + 1): result = acc.get_all_stats() try: json_result = json.loads(result) apps_result = json_result["apps"] current_app = apps_result[options.appname] http_port = current_app["http"] if http_port: break time.sleep(cls.SLEEP_TIME) except (KeyError, ValueError): AppScaleLogger.verbose("Got json error from get_all_data result.", options.verbose) time.sleep(cls.SLEEP_TIME) if not http_port: raise AppScaleException("Unable to get the serving port for the application.") acc.stop_app(options.appname) AppScaleLogger.log("Please wait for your app to shut down.") for _ in range(cls.MAX_RETRIES + 1): if RemoteHelper.is_port_open(login_host, http_port, options.verbose): time.sleep(cls.SLEEP_TIME) AppScaleLogger.log("Waiting for {0} to terminate...".format(options.appname)) else: AppScaleLogger.success("Done shutting down {0}.".format(options.appname)) return AppScaleLogger.warn("App {0} may still be running.".format(options.appname))
def __init__(self, options): """Creates a new NodeLayout from the given YAML file. Args: options: A Namespace or dict that (optionally) contains a field containing the YAML representing the placement strategy to use for this AppScale deployment. This YAML can be either raw YAML, or a str containing a path on the local filesystem that, when read, contains the YAML in question. It can also be set to None, for deployments when the user specifies how many VMs they wish to use. Raises: BadConfigurationException if configuration is not valid. """ if not isinstance(options, dict): options = vars(options) self.master = None input_yaml = options.get('ips') if isinstance(input_yaml, str): with open(input_yaml, 'r') as file_handle: self.input_yaml = yaml.safe_load(file_handle.read()) elif isinstance(input_yaml, dict): self.input_yaml = input_yaml AppScaleLogger.warn("The AppScalefile is changing, the layout you are " "using will be invalid soon. Please see {} for more details.".format( self.APPSCALEFILE_INSTRUCTIONS)) elif isinstance(input_yaml, list): self.input_yaml = input_yaml else: self.input_yaml = None self.disks = options.get('disks') self.infrastructure = options.get('infrastructure') self.min_machines = options.get('min_machines') self.max_machines = options.get('max_machines') self.replication = options.get('replication') self.database_type = options.get('table', 'cassandra') self.add_to_existing = options.get('add_to_existing') self.default_instance_type = options.get('instance_type') self.test = options.get('test') self.force = options.get('force') if 'login_host' in options and options['login_host'] is not None: self.login_host = options['login_host'] else: self.login_host = None self.nodes = [] self.validate_node_layout()
def get_uaserver_host(self, is_verbose): """Queries the AppController to see which machine is hosting the UserAppServer, and at what IP it can be reached. Args: is_verbose: A bool that indicates if we should print out the first AppController's status when we query it. Returns: The IP address where a UserAppServer can be located (although it is not guaranteed to be running). Raises: TimeoutException if MAX_RETRIES is attempted with no answer from controller. """ last_known_state = None retries = 0 while True: try: status = self.get_status() AppScaleLogger.verbose('Received status from head node: ' + status, is_verbose) if status == self.BAD_SECRET_MESSAGE: raise AppControllerException("Could not authenticate successfully" + \ " to the AppController. You may need to change the keyname in use.") match = re.search(r'Database is at (.*)', status) if match and match.group(1) != 'not-up-yet': return match.group(1) else: match = re.search(r'Current State: (.*)', status) if match: if last_known_state != match.group(1): last_known_state = match.group(1) AppScaleLogger.log(last_known_state) else: AppScaleLogger.log('Waiting for AppScale nodes to complete ' 'the initialization process') except (AppControllerException, socket.error) as exception: raise exception except Exception as exception: AppScaleLogger.warn('Saw {0}, waiting a few moments to try again' \ .format(str(exception))) time.sleep(self.WAIT_TIME) retries += 1 if retries >= self.MAX_RETRIES: AppScaleLogger.warn("Too many retries to connect to UAServer.") raise TimeoutException()
def up(self): """Starts an AppScale deployment with the configuration options from the AppScalefile in the current directory. Raises: AppScalefileException: If there is no AppScalefile in the current directory. """ contents = self.read_appscalefile() # If running in a cluster environment, we first need to set up SSH keys contents_as_yaml = yaml.safe_load(contents) if "ips_layout" in contents_as_yaml: ips_layout = base64.b64encode(yaml.dump(contents_as_yaml["ips_layout"])) if not "infrastructure" in contents_as_yaml: # Only run add-keypair if there is no ssh key present, # or if it doesn't log into all the machines specified. if not self.valid_ssh_key(contents_as_yaml): add_keypair_command = [] if "keyname" in contents_as_yaml: add_keypair_command.append("--keyname") add_keypair_command.append(str(contents_as_yaml["keyname"])) add_keypair_command.append("--ips_layout") add_keypair_command.append(ips_layout) options = ParseArgs(add_keypair_command, "appscale-add-keypair").args AppScaleTools.add_keypair(options) # Construct a run-instances command from the file's contents command = [] for key, value in contents_as_yaml.items(): if value is True: command.append(str("--%s" % key)) else: if key == "ips_layout": command.append("--ips_layout") command.append(ips_layout) else: command.append(str("--%s" % key)) command.append(str("%s" % value)) # Finally, call AppScaleTools.run_instances options = ParseArgs(command, "appscale-run-instances").args try: AppScaleTools.run_instances(options) except Exception as e: AppScaleLogger.warn(str(e))
def terminate_spawned_instances(cls, spawned_instance_ids, agent, params): """ Shuts down instances specified. For use when AppScale has failed to start all the instances for the deployment since we do not check or clean any local files. Args: spawned_instance_ids: A list of instance ids we have started that should be terminated. agent: The agent to call terminate instance with. params: Agent parameters. """ terminate_params = params.copy() terminate_params[agent.PARAM_INSTANCE_IDS] = spawned_instance_ids try: agent.terminate_instances(terminate_params) except (AgentRuntimeException, BotoServerError): AppScaleLogger.warn("AppScale failed to terminate instance(s) with " "id(s): {}".format(spawned_instance_ids))
def get_username_from_stdin(cls, is_admin): """Asks the user for the name of the e-mail address that should be made an administrator on their AppScale cloud or App Engine application. Returns: A str containing the e-mail address the user typed in. """ while True: if is_admin: username = raw_input('Enter your desired admin e-mail address: ') else: username = raw_input('Enter your desired e-mail address: ') email_regex = '^.+\\@(\\[?)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,3}|[0-9]{1,3})(\\]?)$' if re.match(email_regex, username): return username else: AppScaleLogger.warn('Invalid e-mail address. Please try again.')
def get_username_from_stdin(cls, is_admin): """Asks the user for the name of the e-mail address that should be made an administrator on their AppScale cloud or App Engine application. Returns: A str containing the e-mail address the user typed in. """ while True: if is_admin: username = raw_input('Enter your desired admin e-mail address: ') else: username = raw_input('Enter your desired e-mail address: ') email_regex = '^.+\\@(\\[?)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,3}|[0-9]{1,3})(\\]?)$' if re.match(email_regex, username): return username else: AppScaleLogger.warn('Invalid e-mail address. Please try again.')
def get_password_from_stdin(cls): """Asks the user for the password that should be used for their user account. Args: username: A str representing the email address associated with the user's account. Returns: The SHA1-hashed version of the password the user typed in. """ while True: password = getpass.getpass('Enter new password: '******'Password must be at least 6 characters long') continue password_confirmation = getpass.getpass('Confirm password: '******'Passwords entered do not match. Please try again.')
def get_password_from_stdin(cls): """Asks the user for the password that should be used for their user account. Args: username: A str representing the email address associated with the user's account. Returns: The SHA1-hashed version of the password the user typed in. """ while True: password = getpass.getpass("Enter new password: "******"Password must be at least 6 characters long") continue password_confirmation = getpass.getpass("Confirm password: "******"Passwords entered do not match. Please try again.")
def get_uaserver_host(self, is_verbose): """Queries the AppController to see which machine is hosting the UserAppServer, and at what IP it can be reached. Args: is_verbose: A bool that indicates if we should print out the first AppController's status when we query it. Returns: The IP address where a UserAppServer can be located (although it is not guaranteed to be running). """ last_known_state = None while True: try: status = self.get_status() AppScaleLogger.verbose( 'Received status from head node: ' + status, is_verbose) if status == self.BAD_SECRET_MESSAGE: raise AppControllerException("Could not authenticate successfully" + \ " to the AppController. You may need to change the keyname in use.") match = re.search(r'Database is at (.*)', status) if match and match.group(1) != 'not-up-yet': return match.group(1) else: match = re.search(r'Current State: (.*)', status) if match: if last_known_state != match.group(1): last_known_state = match.group(1) AppScaleLogger.log(last_known_state) else: AppScaleLogger.log( 'Waiting for AppScale nodes to complete ' 'the initialization process') except (AppControllerException, socket.error) as exception: raise exception except Exception as exception: AppScaleLogger.warn('Saw {0}, waiting a few moments to try again' \ .format(str(exception))) time.sleep(self.WAIT_TIME)
def enable_root_login(cls, host, keyname, infrastructure, is_verbose): """Logs into the named host and alters its ssh configuration to enable the root user to directly log in. Args: host: A str representing the host to enable root logins on. keyname: A str representing the name of the SSH keypair to login with. infrastructure: A str representing the name of the cloud infrastructure we're running on. is_verbose: A bool indicating if we should print the command we execute to enable root login to stdout. """ try: cls.ssh(host, keyname, 'sudo cp ~/.ssh/authorized_keys /root/.ssh/', is_verbose, user='******') except ShellException as exception: if infrastructure == 'euca': AppScaleLogger.warn("Couldn't enable root login - it may already " + \ "be enabled") else: raise exception
def reset_password(cls, options): """Resets a user's password the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ secret = LocalState.get_secret_key(options.keyname) username, password = LocalState.get_credentials(is_admin=False) encrypted_password = LocalState.encrypt_password(username, password) uac = UserAppClient(LocalState.get_login_host(options.keyname), secret) try: uac.change_password(username, encrypted_password) AppScaleLogger.success("The password was successfully changed for the " "given user.") except Exception as exception: AppScaleLogger.warn( "Could not change the user's password for the " + "following reason: {0}".format(str(exception)) ) sys.exit(1)
def reset_password(cls, options): """Resets a user's password the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ secret = LocalState.get_secret_key(options.keyname) username, password = LocalState.get_credentials(is_admin=False) encrypted_password = LocalState.encrypt_password(username, password) uac = UserAppClient(LocalState.get_login_host(options.keyname), secret) try: uac.change_password(username, encrypted_password) AppScaleLogger.success("The password was successfully changed for the " \ "given user.") except Exception as exception: AppScaleLogger.warn("Could not change the user's password for the " + \ "following reason: {0}".format(str(exception))) sys.exit(1)
def get_uaserver_host(self, is_verbose): """Queries the AppController to see which machine is hosting the UserAppServer, and at what IP it can be reached. Args: is_verbose: A bool that indicates if we should print out the first AppController's status when we query it. Returns: The IP address where a UserAppServer can be located (although it is not guaranteed to be running). """ last_known_state = None while True: try: status = self.get_status() AppScaleLogger.verbose("Received status from head node: " + status, is_verbose) if status == self.BAD_SECRET_MESSAGE: raise AppControllerException( "Could not authenticate successfully" + " to the AppController. You may need to change the keyname in use." ) match = re.search(r"Database is at (.*)", status) if match and match.group(1) != "not-up-yet": return match.group(1) else: match = re.search(r"Current State: (.*)", status) if match: if last_known_state != match.group(1): last_known_state = match.group(1) AppScaleLogger.log(last_known_state) else: AppScaleLogger.log("Waiting for AppScale nodes to complete " "the initialization process") except AppControllerException as exception: raise exception except Exception as exception: AppScaleLogger.warn("Saw {0}, waiting a few moments to try again".format(str(exception))) time.sleep(self.WAIT_TIME)
def describe_instances(cls, options): """Queries each node in the currently running AppScale deployment and reports on their status. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ login_host = LocalState.get_login_host(options.keyname) login_acc = AppControllerClient(login_host, LocalState.get_secret_key(options.keyname)) for ip in login_acc.get_all_public_ips(): acc = AppControllerClient(ip, LocalState.get_secret_key(options.keyname)) AppScaleLogger.log("Status of node at {0}:".format(ip)) try: AppScaleLogger.log(acc.get_status()) except Exception as e: AppScaleLogger.warn("Unable to contact machine: {0}\n".format(str(e))) AppScaleLogger.success("View status information about your AppScale " + \ "deployment at http://{0}/status".format(login_host))
def gather_logs(cls, options): """Collects logs from each machine in the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ # First, make sure that the place we want to store logs doesn't # already exist. if os.path.exists(options.location): raise AppScaleException("Can't gather logs, as the location you " + \ "specified, {0}, already exists.".format(options.location)) acc = AppControllerClient(LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname)) try: all_ips = acc.get_all_public_ips() except socket.error: # Occurs when the AppController has failed. AppScaleLogger.warn("Couldn't get an up-to-date listing of the " + \ "machines in this AppScale deployment. Using our locally cached " + \ "info instead.") all_ips = LocalState.get_all_public_ips(options.keyname) # do the mkdir after we get the secret key, so that a bad keyname will # cause the tool to crash and not create this directory os.mkdir(options.location) for ip in all_ips: # Get the logs from each node, and store them in our local directory local_dir = "{0}/{1}".format(options.location, ip) os.mkdir(local_dir) RemoteHelper.scp_remote_to_local(ip, options.keyname, '/var/log/appscale', local_dir, options.verbose) AppScaleLogger.success("Successfully copied logs to {0}".format( options.location))
def deploy(self, app): """'deploy' is a more accessible way to tell an AppScale deployment to run a Google App Engine application than 'appscale-upload-app'. It calls that command with the configuration options found in the AppScalefile in the current working directory. Args: app: The path (absolute or relative) to the Google App Engine application that should be uploaded. Raises: AppScalefileException: If there is no AppScalefile in the current working directory. """ contents = self.read_appscalefile() # Construct an upload-app command from the file's contents command = [] contents_as_yaml = yaml.safe_load(contents) if 'keyname' in contents_as_yaml: command.append("--keyname") command.append(contents_as_yaml['keyname']) if 'test' in contents_as_yaml: command.append("--test") if 'verbose' in contents_as_yaml: command.append("--verbose") command.append("--file") command.append(app) # Finally, exec the command. Don't worry about validating it - # appscale-upload-app will do that for us. options = ParseArgs(command, "appscale-upload-app").args try: AppScaleTools.upload_app(options) except Exception as e: AppScaleLogger.warn(str(e))
def upload_app(cls, options): """Uploads the given App Engine application into AppScale. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Returns: A tuple containing the host and port where the application is serving traffic from. """ if cls.TAR_GZ_REGEX.search(options.file): file_location = LocalState.extract_tgz_app_to_dir( options.file, options.verbose) created_dir = True elif cls.ZIP_REGEX.search(options.file): file_location = LocalState.extract_zip_app_to_dir( options.file, options.verbose) created_dir = True elif os.path.isdir(options.file): file_location = options.file created_dir = False else: raise AppEngineConfigException('{0} is not a tar.gz file, a zip file, ' \ 'or a directory. Please try uploading either a tar.gz file, a zip ' \ 'file, or a directory.'.format(options.file)) try: app_id = AppEngineHelper.get_app_id_from_app_config(file_location) except AppEngineConfigException as config_error: AppScaleLogger.log(config_error) if 'yaml' in str(config_error): raise config_error # Java App Engine users may have specified their war directory. In that # case, just move up one level, back to the app's directory. file_location = file_location + os.sep + ".." app_id = AppEngineHelper.get_app_id_from_app_config(file_location) app_language = AppEngineHelper.get_app_runtime_from_app_config( file_location) AppEngineHelper.validate_app_id(app_id) if app_language == 'java': if AppEngineHelper.is_sdk_mismatch(file_location): AppScaleLogger.warn( 'AppScale did not find the correct SDK jar ' + 'versions in your app. The current supported ' + 'SDK version is ' + AppEngineHelper.SUPPORTED_SDK_VERSION + '.') login_host = LocalState.get_login_host(options.keyname) secret_key = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(login_host, secret_key) if options.test: username = LocalState.DEFAULT_USER elif options.email: username = options.email else: username = LocalState.get_username_from_stdin(is_admin=False) if not acc.does_user_exist(username): password = LocalState.get_password_from_stdin() RemoteHelper.create_user_accounts(username, password, login_host, options.keyname, clear_datastore=False) app_exists = acc.does_app_exist(app_id) app_admin = acc.get_app_admin(app_id) if app_admin is not None and username != app_admin: raise AppScaleException("The given user doesn't own this application" + \ ", so they can't upload an app with that application ID. Please " + \ "change the application ID and try again.") if app_exists: AppScaleLogger.log( "Uploading new version of app {0}".format(app_id)) else: AppScaleLogger.log( "Uploading initial version of app {0}".format(app_id)) acc.reserve_app_id(username, app_id, app_language) # Ignore all .pyc files while tarring. if app_language == 'python27': AppScaleLogger.log("Ignoring .pyc files") remote_file_path = RemoteHelper.copy_app_to_host( file_location, options.keyname, options.verbose) acc.done_uploading(app_id, remote_file_path) acc.update([app_id]) # now that we've told the AppController to start our app, find out what port # the app is running on and wait for it to start serving AppScaleLogger.log("Please wait for your app to start serving.") if app_exists: time.sleep(20) # give the AppController time to restart the app # Makes a call to the AppController to get all the stats and looks # through them for the http port the app can be reached on. sleep_time = 2 * cls.SLEEP_TIME current_app = None for i in range(cls.MAX_RETRIES): try: result = acc.get_all_stats() json_result = json.loads(result) apps_result = json_result['apps'] current_app = apps_result[app_id] http_port = current_app['http'] break except ValueError: pass except KeyError: pass AppScaleLogger.verbose("Waiting {0} second(s) for a port to be assigned to {1}".\ format(sleep_time, app_id), options.verbose) time.sleep(sleep_time) if not current_app: raise AppScaleException( "Unable to get the serving port for the application.") RemoteHelper.sleep_until_port_is_open(login_host, http_port, options.verbose) AppScaleLogger.success( "Your app can be reached at the following URL: " + "http://{0}:{1}".format(login_host, http_port)) if created_dir: shutil.rmtree(file_location) return (login_host, http_port)
def gather_logs(cls, options): """Collects logs from each machine in the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ # First, make sure that the place we want to store logs doesn't # already exist. if os.path.exists(options.location): raise AppScaleException("Can't gather logs, as the location you " + \ "specified, {0}, already exists.".format(options.location)) acc = AppControllerClient(LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname)) try: all_ips = acc.get_all_public_ips() except socket.error: # Occurs when the AppController has failed. AppScaleLogger.warn("Couldn't get an up-to-date listing of the " + \ "machines in this AppScale deployment. Using our locally cached " + \ "info instead.") all_ips = LocalState.get_all_public_ips(options.keyname) # do the mkdir after we get the secret key, so that a bad keyname will # cause the tool to crash and not create this directory os.mkdir(options.location) # The log paths that we collect logs from. log_paths = [ '/var/log/appscale', '/var/log/kern.log*', '/var/log/monit.log*', '/var/log/nginx', '/var/log/syslog*', '/var/log/zookeeper' ] failures = False for ip in all_ips: # Get the logs from each node, and store them in our local directory local_dir = "{0}/{1}".format(options.location, ip) os.mkdir(local_dir) for log_path in log_paths: try: RemoteHelper.scp_remote_to_local(ip, options.keyname, log_path, local_dir, options.verbose) except ShellException as shell_exception: failures = True AppScaleLogger.warn( "Unable to collect logs from '{}' for host '{}'". format(log_path, ip)) AppScaleLogger.verbose( "Encountered exception: {}".format( str(shell_exception)), options.verbose) if failures: AppScaleLogger.log( "Done copying to {0}. There were " "failures while collecting AppScale logs.".format( options.location)) else: AppScaleLogger.success( "Successfully collected all AppScale logs into " "{0}".format(options.location))
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}". format(secret_key), options.verbose) head_node = node_layout.head_node().public_ip AppScaleLogger.log("Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), head_node)) additional_params = {} if options.infrastructure: agent = InfrastructureAgentFactory.create_agent(options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[agent.PARAM_REGION] time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(head_node, options) cls.run_user_commands(head_node, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(head_node, options.keyname, options.verbose) AppScaleLogger.log("Head node successfully initialized at {0}.". format(head_node)) AppScaleLogger.remote_log_tools_state( options, my_id, "started head node", APPSCALE_VERSION) # Construct serverside compatible parameters. deployment_params = LocalState.generate_deployment_params( options, node_layout, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) acc = AppControllerClient(head_node, secret_key) try: acc.set_parameters(node_layout.to_list(), deployment_params) except Exception as exception: AppScaleLogger.warn(u'Saw Exception while setting AC parameters: {0}'. format(str(exception))) message = RemoteHelper.collect_appcontroller_crashlog( head_node, options.keyname, options.verbose) raise AppControllerException(message)
def run_instances(cls, options): """Starts a new AppScale deployment with the parameters given. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Raises: BadConfigurationException: If the user passes in options that are not sufficient to start an AppScale deplyoment (e.g., running on EC2 but not specifying the AMI to use), or if the user provides us contradictory options (e.g., running on EC2 but not specifying EC2 credentials). """ LocalState.make_appscale_directory() LocalState.ensure_appscale_isnt_running(options.keyname, options.force) if options.infrastructure: AppScaleLogger.log("Starting AppScale " + APPSCALE_VERSION + " over the " + options.infrastructure + " cloud.") else: AppScaleLogger.log("Starting AppScale " + APPSCALE_VERSION + " over a virtualized cluster.") my_id = str(uuid.uuid4()) AppScaleLogger.remote_log_tools_state(options, my_id, "started", APPSCALE_VERSION) node_layout = NodeLayout(options) if not node_layout.is_valid(): raise BadConfigurationException("There were errors with your " + \ "placement strategy:\n{0}".format(str(node_layout.errors()))) if not node_layout.is_supported(): AppScaleLogger.warn("Warning: This deployment strategy is not " + \ "officially supported.") public_ip, instance_id = RemoteHelper.start_head_node(options, my_id, node_layout) AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines " + "for use.") # Write our metadata as soon as possible to let users SSH into those # machines via 'appscale ssh' LocalState.update_local_metadata(options, node_layout, public_ip, instance_id) RemoteHelper.copy_local_metadata(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, LocalState.get_secret_key( options.keyname)) uaserver_host = acc.get_uaserver_host(options.verbose) RemoteHelper.sleep_until_port_is_open(uaserver_host, UserAppClient.PORT, options.verbose) # Update our metadata again so that users can SSH into other boxes that # may have been started. LocalState.update_local_metadata(options, node_layout, public_ip, instance_id) RemoteHelper.copy_local_metadata(public_ip, options.keyname, options.verbose) AppScaleLogger.log("UserAppServer is at {0}".format(uaserver_host)) uaserver_client = UserAppClient(uaserver_host, LocalState.get_secret_key(options.keyname)) if options.admin_user and options.admin_pass: AppScaleLogger.log("Using the provided admin username/password") username, password = options.admin_user, options.admin_pass elif options.test: AppScaleLogger.log("Using default admin username/password") username, password = LocalState.DEFAULT_USER, LocalState.DEFAULT_PASSWORD else: username, password = LocalState.get_credentials() RemoteHelper.create_user_accounts(username, password, uaserver_host, options.keyname) uaserver_client.set_admin_role(username) RemoteHelper.wait_for_machines_to_finish_loading(public_ip, options.keyname) # Finally, update our metadata once we know that all of the machines are # up and have started all their API services. LocalState.update_local_metadata(options, node_layout, public_ip, instance_id) RemoteHelper.copy_local_metadata(public_ip, options.keyname, options.verbose) RemoteHelper.sleep_until_port_is_open(LocalState.get_login_host( options.keyname), RemoteHelper.APP_LOAD_BALANCER_PORT, options.verbose) AppScaleLogger.success("AppScale successfully started!") AppScaleLogger.success("View status information about your AppScale " + \ "deployment at http://{0}/status".format(LocalState.get_login_host( options.keyname))) AppScaleLogger.remote_log_tools_state(options, my_id, "finished", APPSCALE_VERSION)
def start_all_nodes(cls, options, node_layout): """ Starts all nodes in the designated public cloud. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). node_layout: The node layout of the system including roles. Returns: The node layout (dummy values in non-cloud deployments) corresponding to the nodes that were started. """ agent = InfrastructureAgentFactory.create_agent(options.infrastructure) params = agent.get_params_from_args(options) # If we have running instances under the current keyname, we try to # re-attach to them. If we have issue finding the locations file or the # IP of the head node, we throw an exception. login_ip = None public_ips, private_ips, instance_ids = agent.describe_instances(params) if public_ips: try: login_ip = LocalState.get_login_host(options.keyname) except (IOError, BadConfigurationException): raise AppScaleException( "Couldn't get login ip for running deployment with keyname" " {}.".format(options.keyname)) if login_ip not in public_ips: raise AppScaleException( "Couldn't recognize running instances for deployment with" " keyname {}.".format(options.keyname)) if login_ip in public_ips: AppScaleLogger.log("Reusing already running instances.") # Get the node_info from the locations JSON. node_info = LocalState.get_local_nodes_info(keyname=options.keyname) previous_node_list = node_layout.from_locations_json_list(node_info) # If this is None, the AppScalefile has been changed or the nodes could # not be matched up by roles/jobs. if previous_node_list is None: raise BadConfigurationException("AppScale does not currently support " "changes to AppScalefile or locations " "JSON between a down and an up. If " "you would like to " "change the node layout use " "down --terminate before an up.") node_layout.nodes = previous_node_list for node_index, node in enumerate(node_layout.nodes): try: index = instance_ids.index(node.instance_id) except ValueError: raise BadConfigurationException("Previous instance_id {} does not " "currently exist." .format(node.instance_id)) node_layout.nodes[node_index].public_ip = public_ips[index] node_layout.nodes[node_index].private_ip = private_ips[index] node_layout.nodes[node_index].instance_id = instance_ids[index] return node_layout agent.configure_instance_security(params) load_balancer_roles = {} instance_type_roles = {} for node in node_layout.get_nodes('load_balancer', True): load_balancer_roles.setdefault(node.instance_type, []).append(node) for node in node_layout.get_nodes('load_balancer', False): instance_type = instance_type_roles instance_type.setdefault(node.instance_type, []).append(node) spawned_instance_ids = [] for instance_type, load_balancer_nodes in load_balancer_roles.items(): # Copy parameters so we can modify the instance type. instance_type_params = params.copy() instance_type_params['instance_type'] = instance_type try: instance_ids, public_ips, private_ips = cls.spawn_nodes_in_cloud( agent, instance_type_params, count=len(load_balancer_nodes), load_balancer=True) except (AgentRuntimeException, BotoServerError): AppScaleLogger.warn("AppScale was unable to start the requested number " "of instances, attempting to terminate those that " "were started.") if len(spawned_instance_ids) > 0: AppScaleLogger.warn("Attempting to terminate those that were started.") cls.terminate_spawned_instances(spawned_instance_ids, agent, params) # Cleanup the keyname since it failed. LocalState.cleanup_keyname(options.keyname) # Re-raise the original exception. raise # Keep track of instances we have started. spawned_instance_ids.extend(instance_ids) for node_index, node in enumerate(load_balancer_nodes): index = node_layout.nodes.index(node) node_layout.nodes[index].public_ip = public_ips[node_index] node_layout.nodes[index].private_ip = private_ips[node_index] node_layout.nodes[index].instance_id = instance_ids[node_index] if options.static_ip: node = node_layout.head_node() agent.associate_static_ip(params, node.instance_id, options.static_ip) node.public_ip = options.static_ip AppScaleLogger.log("Static IP associated with head node.") AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines " "for use. This can take few minutes.") for instance_type, nodes in instance_type_roles.items(): # Copy parameters so we can modify the instance type. instance_type_params = params.copy() instance_type_params['instance_type'] = instance_type try: _instance_ids, _public_ips, _private_ips = cls.spawn_nodes_in_cloud( agent, instance_type_params, count=len(nodes)) except (AgentRuntimeException, BotoServerError): AppScaleLogger.warn("AppScale was unable to start the requested number " "of instances, attempting to terminate those that " "were started.") if len(spawned_instance_ids) > 0: AppScaleLogger.warn("Attempting to terminate those that were started.") cls.terminate_spawned_instances(spawned_instance_ids, agent, params) # Cleanup the keyname since it failed. LocalState.cleanup_keyname(options.keyname) # Re-raise the original exception. raise # Keep track of instances we have started. spawned_instance_ids.extend(_instance_ids) for node_index, node in enumerate(nodes): index = node_layout.nodes.index(node) node_layout.nodes[index].public_ip = _public_ips[node_index] node_layout.nodes[index].private_ip = _private_ips[node_index] node_layout.nodes[index].instance_id = _instance_ids[node_index] return node_layout
def upload_app(cls, options): """Uploads the given App Engine application into AppScale. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Returns: A tuple containing the host and port where the application is serving traffic from. """ if cls.TAR_GZ_REGEX.search(options.file): file_location = LocalState.extract_tgz_app_to_dir(options.file, options.verbose) created_dir = True elif cls.ZIP_REGEX.search(options.file): file_location = LocalState.extract_zip_app_to_dir(options.file, options.verbose) created_dir = True elif os.path.isdir(options.file): file_location = options.file created_dir = False else: raise AppEngineConfigException( "{0} is not a tar.gz file, a zip file, " "or a directory. Please try uploading either a tar.gz file, a zip " "file, or a directory.".format(options.file) ) try: app_id = AppEngineHelper.get_app_id_from_app_config(file_location) except AppEngineConfigException: # Java App Engine users may have specified their war directory. In that # case, just move up one level, back to the app's directory. file_location = file_location + os.sep + ".." app_id = AppEngineHelper.get_app_id_from_app_config(file_location) app_language = AppEngineHelper.get_app_runtime_from_app_config(file_location) AppEngineHelper.validate_app_id(app_id) if app_language == "java": if AppEngineHelper.is_sdk_mismatch(file_location): AppScaleLogger.warn( "AppScale did not find the correct SDK jar " + "versions in your app. The current supported " + "SDK version is " + AppEngineHelper.SUPPORTED_SDK_VERSION + "." ) acc = AppControllerClient( LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname) ) userappclient = UserAppClient( LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname) ) if options.test: username = LocalState.DEFAULT_USER elif options.email: username = options.email else: username = LocalState.get_username_from_stdin(is_admin=False) if not userappclient.does_user_exist(username): password = LocalState.get_password_from_stdin() RemoteHelper.create_user_accounts( username, password, LocalState.get_login_host(options.keyname), options.keyname, clear_datastore=False ) app_exists = userappclient.does_app_exist(app_id) app_admin = userappclient.get_app_admin(app_id) if app_admin is not None and username != app_admin: raise AppScaleException( "The given user doesn't own this application" + ", so they can't upload an app with that application ID. Please " + "change the application ID and try again." ) if app_exists: AppScaleLogger.log("Uploading new version of app {0}".format(app_id)) else: AppScaleLogger.log("Uploading initial version of app {0}".format(app_id)) userappclient.reserve_app_id(username, app_id, app_language) # Ignore all .pyc files while tarring. if app_language == "python27": AppScaleLogger.log("Ignoring .pyc files") remote_file_path = RemoteHelper.copy_app_to_host(file_location, options.keyname, options.verbose) acc.done_uploading(app_id, remote_file_path) acc.update([app_id]) # now that we've told the AppController to start our app, find out what port # the app is running on and wait for it to start serving AppScaleLogger.log("Please wait for your app to start serving.") if app_exists: time.sleep(20) # give the AppController time to restart the app serving_host, serving_port = userappclient.get_serving_info(app_id, options.keyname) RemoteHelper.sleep_until_port_is_open(serving_host, serving_port, options.verbose) AppScaleLogger.success( "Your app can be reached at the following URL: " + "http://{0}:{1}".format(serving_host, serving_port) ) if created_dir: shutil.rmtree(file_location) return (serving_host, serving_port)
def run_instances(cls, options): """Starts a new AppScale deployment with the parameters given. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Raises: AppControllerException: If the AppController on the head node crashes. When this occurs, the message in the exception contains the reason why the AppController crashed. BadConfigurationException: If the user passes in options that are not sufficient to start an AppScale deployment (e.g., running on EC2 but not specifying the AMI to use), or if the user provides us contradictory options (e.g., running on EC2 but not specifying EC2 credentials). """ LocalState.make_appscale_directory() LocalState.ensure_appscale_isnt_running(options.keyname, options.force) if options.infrastructure: if not options.disks and not options.test and not options.force: LocalState.ensure_user_wants_to_run_without_disks() AppScaleLogger.log("Starting AppScale " + APPSCALE_VERSION + " over the " + options.infrastructure + " cloud.") else: AppScaleLogger.log("Starting AppScale " + APPSCALE_VERSION + " over a virtualized cluster.") my_id = str(uuid.uuid4()) AppScaleLogger.remote_log_tools_state(options, my_id, "started", APPSCALE_VERSION) node_layout = NodeLayout(options) if not node_layout.is_valid(): raise BadConfigurationException("There were errors with your " + \ "placement strategy:\n{0}".format(str(node_layout.errors()))) if not node_layout.is_supported(): AppScaleLogger.warn("Warning: This deployment strategy is not " + \ "officially supported.") public_ip, instance_id = RemoteHelper.start_head_node( options, my_id, node_layout) AppScaleLogger.log( "\nPlease wait for AppScale to prepare your machines " + "for use.") # Write our metadata as soon as possible to let users SSH into those # machines via 'appscale ssh' LocalState.update_local_metadata(options, node_layout, public_ip, instance_id) RemoteHelper.copy_local_metadata(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, LocalState.get_secret_key(options.keyname)) try: uaserver_host = acc.get_uaserver_host(options.verbose) except Exception: message = RemoteHelper.collect_appcontroller_crashlog( public_ip, options.keyname, options.verbose) raise AppControllerException(message) RemoteHelper.sleep_until_port_is_open(uaserver_host, UserAppClient.PORT, options.verbose) # Update our metadata again so that users can SSH into other boxes that # may have been started. LocalState.update_local_metadata(options, node_layout, public_ip, instance_id) RemoteHelper.copy_local_metadata(public_ip, options.keyname, options.verbose) AppScaleLogger.log("UserAppServer is at {0}".format(uaserver_host)) uaserver_client = UserAppClient( uaserver_host, LocalState.get_secret_key(options.keyname)) if options.admin_user and options.admin_pass: AppScaleLogger.log("Using the provided admin username/password") username, password = options.admin_user, options.admin_pass elif options.test: AppScaleLogger.log("Using default admin username/password") username, password = LocalState.DEFAULT_USER, LocalState.DEFAULT_PASSWORD else: username, password = LocalState.get_credentials() RemoteHelper.create_user_accounts(username, password, uaserver_host, options.keyname, options.clear_datastore) uaserver_client.set_admin_role(username) RemoteHelper.wait_for_machines_to_finish_loading( public_ip, options.keyname) # Finally, update our metadata once we know that all of the machines are # up and have started all their API services. LocalState.update_local_metadata(options, node_layout, public_ip, instance_id) RemoteHelper.copy_local_metadata(public_ip, options.keyname, options.verbose) RemoteHelper.sleep_until_port_is_open( LocalState.get_login_host(options.keyname), RemoteHelper.APP_DASHBOARD_PORT, options.verbose) AppScaleLogger.success("AppScale successfully started!") AppScaleLogger.success("View status information about your AppScale " + \ "deployment at http://{0}:{1}/status".format(LocalState.get_login_host( options.keyname), RemoteHelper.APP_DASHBOARD_PORT)) AppScaleLogger.remote_log_tools_state(options, my_id, "finished", APPSCALE_VERSION)
def upload_app(cls, options): """Uploads the given App Engine application into AppScale. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Returns: A tuple containing the host and port where the application is serving traffic from. """ if cls.TAR_GZ_REGEX.search(options.file): file_location = LocalState.extract_tgz_app_to_dir( options.file, options.verbose) created_dir = True elif cls.ZIP_REGEX.search(options.file): file_location = LocalState.extract_zip_app_to_dir( options.file, options.verbose) created_dir = True elif os.path.isdir(options.file): file_location = options.file created_dir = False else: raise AppEngineConfigException('{0} is not a tar.gz file, a zip file, ' \ 'or a directory. Please try uploading either a tar.gz file, a zip ' \ 'file, or a directory.'.format(options.file)) try: app_id = AppEngineHelper.get_app_id_from_app_config(file_location) except AppEngineConfigException: # Java App Engine users may have specified their war directory. In that # case, just move up one level, back to the app's directory. file_location = file_location + os.sep + ".." app_id = AppEngineHelper.get_app_id_from_app_config(file_location) app_language = AppEngineHelper.get_app_runtime_from_app_config( file_location) AppEngineHelper.validate_app_id(app_id) if app_language == 'java': if AppEngineHelper.is_sdk_mismatch(file_location): AppScaleLogger.warn( 'AppScale did not find the correct SDK jar ' + 'versions in your app. The current supported ' + 'SDK version is ' + AppEngineHelper.SUPPORTED_SDK_VERSION + '.') acc = AppControllerClient(LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname)) userappserver_host = acc.get_uaserver_host(options.verbose) userappclient = UserAppClient( userappserver_host, LocalState.get_secret_key(options.keyname)) if options.test: username = LocalState.DEFAULT_USER elif options.email: username = options.email else: username = LocalState.get_username_from_stdin(is_admin=False) if not userappclient.does_user_exist(username): password = LocalState.get_password_from_stdin() RemoteHelper.create_user_accounts(username, password, userappserver_host, options.keyname, clear_datastore=False) app_exists = userappclient.does_app_exist(app_id) app_admin = userappclient.get_app_admin(app_id) if app_admin is not None and username != app_admin: raise AppScaleException("The given user doesn't own this application" + \ ", so they can't upload an app with that application ID. Please " + \ "change the application ID and try again.") if app_exists: AppScaleLogger.log( "Uploading new version of app {0}".format(app_id)) else: AppScaleLogger.log( "Uploading initial version of app {0}".format(app_id)) userappclient.reserve_app_id(username, app_id, app_language) remote_file_path = RemoteHelper.copy_app_to_host( file_location, options.keyname, options.verbose) acc.done_uploading(app_id, remote_file_path) acc.update([app_id]) # now that we've told the AppController to start our app, find out what port # the app is running on and wait for it to start serving AppScaleLogger.log("Please wait for your app to start serving.") if app_exists: time.sleep(20) # give the AppController time to restart the app serving_host, serving_port = userappclient.get_serving_info( app_id, options.keyname) RemoteHelper.sleep_until_port_is_open(serving_host, serving_port, options.verbose) AppScaleLogger.success( "Your app can be reached at the following URL: " + "http://{0}:{1}".format(serving_host, serving_port)) if created_dir: shutil.rmtree(file_location) return (serving_host, serving_port)
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) head_node = node_layout.head_node().public_ip AppScaleLogger.log( "Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), head_node)) additional_params = {} if options.infrastructure: agent = InfrastructureAgentFactory.create_agent( options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[ agent.PARAM_REGION] time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(head_node, options) cls.run_user_commands(head_node, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(head_node, options.keyname, options.verbose) AppScaleLogger.log( "Head node successfully initialized at {0}.".format(head_node)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) # Construct serverside compatible parameters. deployment_params = LocalState.generate_deployment_params( options, node_layout, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) acc = AppControllerClient(head_node, secret_key) try: acc.set_parameters(node_layout.to_list(), deployment_params) except Exception as exception: AppScaleLogger.warn( 'Saw Exception while setting AC parameters: {0}'.format( str(exception))) message = RemoteHelper.collect_appcontroller_crashlog( head_node, options.keyname, options.verbose) raise AppControllerException(message)
def run_upgrade_script(cls, options, node_layout): """ Runs the upgrade script which checks for any upgrades needed to be performed. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. node_layout: A NodeLayout object for the deployment. """ timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') db_ips = [node.private_ip for node in node_layout.nodes if node.is_role('db_master') or node.is_role('db_slave')] zk_ips = [node.private_ip for node in node_layout.nodes if node.is_role('zookeeper')] upgrade_script_command = '{script} --keyname {keyname} '\ '--log-postfix {timestamp} '\ '--db-master {db_master} '\ '--zookeeper {zk_ips} '\ '--database {db_ips} '\ '--replication {replication}'.format( script=cls.UPGRADE_SCRIPT, keyname=options.keyname, timestamp=timestamp, db_master=node_layout.db_master().private_ip, zk_ips=' '.join(zk_ips), db_ips=' '.join(db_ips), replication=node_layout.replication ) master_public_ip = node_layout.head_node().public_ip AppScaleLogger.log("Running upgrade script to check if any other upgrade is needed.") # Run the upgrade command as a background process. error_bucket = Queue.Queue() threading.Thread( target=async_layout_upgrade, args=(master_public_ip, options.keyname, upgrade_script_command, error_bucket, options.verbose) ).start() last_message = None while True: # Check if the SSH thread has crashed. try: ssh_error = error_bucket.get(block=False) AppScaleLogger.warn('Error executing upgrade script') LocalState.generate_crash_log(ssh_error, traceback.format_exc()) except Queue.Empty: pass upgrade_status_file = cls.UPGRADE_STATUS_FILE_LOC + timestamp + ".json" command = 'cat' + " " + upgrade_status_file upgrade_status = RemoteHelper.ssh( master_public_ip, options.keyname, command, options.verbose) json_status = json.loads(upgrade_status) if 'status' not in json_status or 'message' not in json_status: raise AppScaleException('Invalid status log format') if json_status['status'] == 'complete': AppScaleLogger.success(json_status['message']) break if json_status['status'] == 'inProgress': if json_status['message'] != last_message: AppScaleLogger.log(json_status['message']) last_message = json_status['message'] time.sleep(cls.SLEEP_TIME) continue # Assume the message is an error. AppScaleLogger.warn(json_status['message']) raise AppScaleException(json_status['message'])
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) if options.infrastructure: instance_id, public_ip, private_ip = cls.spawn_node_in_cloud( options) else: instance_id = cls.DUMMY_INSTANCE_ID public_ip = node_layout.head_node().public_ip private_ip = node_layout.head_node().private_ip AppScaleLogger.log( "Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), public_ip)) try: cls.ensure_machine_is_compatible(public_ip, options.keyname, options.table, options.verbose) except AppScaleException as ase: # On failure shutdown the cloud instances, cleanup the keys, but only # if --test is not set. if options.infrastructure: if not options.test: try: cls.terminate_cloud_instance(instance_id, options) except Exception as tcie: AppScaleLogger.log( "Error terminating instances: {0}".format( str(tcie))) raise AppScaleException("{0} Please ensure that the "\ "image {1} has AppScale {2} installed on it." .format(str(ase), options.machine, APPSCALE_VERSION)) else: raise AppScaleException("{0} Please login to that machine and ensure "\ "that AppScale {1} is installed on it." .format(str(ase), APPSCALE_VERSION)) if options.scp: AppScaleLogger.log( "Copying over local copy of AppScale from {0}".format( options.scp)) cls.rsync_files(public_ip, options.keyname, options.scp, options.verbose) # On Euca, we've seen issues where attaching the EBS volume right after # the instance starts doesn't work. This sleep lets the instance fully # start up and get volumes attached to it correctly. if options.infrastructure and options.infrastructure == 'euca' and \ options.disks: time.sleep(30) if options.infrastructure: agent = InfrastructureAgentFactory.create_agent( options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[ agent.PARAM_REGION] else: additional_params = {} deployment_params = LocalState.generate_deployment_params( options, node_layout, public_ip, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) AppScaleLogger.log( "Head node successfully initialized at {0}.".format(public_ip)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(public_ip, options) cls.run_user_commands(public_ip, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, secret_key) locations = [{ 'public_ip': public_ip, 'private_ip': private_ip, 'jobs': node_layout.head_node().roles, 'instance_id': instance_id, 'disk': node_layout.head_node().disk }] try: acc.set_parameters(locations, LocalState.map_to_array(deployment_params)) except Exception as exception: AppScaleLogger.warn('Saw Exception while setting AC parameters: {0}' \ .format(str(exception))) message = RemoteHelper.collect_appcontroller_crashlog( public_ip, options.keyname, options.verbose) raise AppControllerException(message) return public_ip, instance_id
def gather_logs(cls, options): """Collects logs from each machine in the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ # First, make sure that the place we want to store logs doesn't # already exist. if os.path.exists(options.location): raise AppScaleException( "Can't gather logs, as the location you " + "specified, {0}, already exists.".format(options.location) ) acc = AppControllerClient( LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname) ) try: all_ips = acc.get_all_public_ips() except socket.error: # Occurs when the AppController has failed. AppScaleLogger.warn( "Couldn't get an up-to-date listing of the " + "machines in this AppScale deployment. Using our locally cached " + "info instead." ) all_ips = LocalState.get_all_public_ips(options.keyname) # do the mkdir after we get the secret key, so that a bad keyname will # cause the tool to crash and not create this directory os.mkdir(options.location) # The log paths that we collect logs from. log_paths = [ "/var/log/appscale", "/var/log/kern.log*", "/var/log/monit.log*", "/var/log/nginx", "/var/log/syslog*", "/var/log/zookeeper", ] failures = False for ip in all_ips: # Get the logs from each node, and store them in our local directory local_dir = "{0}/{1}".format(options.location, ip) os.mkdir(local_dir) for log_path in log_paths: try: RemoteHelper.scp_remote_to_local(ip, options.keyname, log_path, local_dir, options.verbose) except ShellException as shell_exception: failures = True AppScaleLogger.warn("Unable to collect logs from '{}' for host '{}'".format(log_path, ip)) AppScaleLogger.verbose("Encountered exception: {}".format(str(shell_exception)), options.verbose) if failures: AppScaleLogger.log( "Done copying to {0}. There were " "failures while collecting AppScale logs.".format(options.location) ) else: AppScaleLogger.success("Successfully collected all AppScale logs into " "{0}".format(options.location))
def terminate_virtualized_cluster(cls, keyname, clean, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. clean: A bool representing whether clean should be ran on the nodes. """ AppScaleLogger.log("Stopping appscale deployment with keyname {0}" .format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: machines = len(acc.get_all_public_ips()) - 1 acc.run_terminate(clean) terminated_successfully = True log_dump = u"" while not acc.is_appscale_terminated(): # For terminate receive_server_message will return a JSON string that # is a list of dicts with keys: ip, status, output try: output_list = yaml.safe_load(acc.receive_server_message()) except Exception as e: log_dump += e.message continue for node in output_list: if node.get("status"): machines -= 1 AppScaleLogger.success("Node at {node_ip}: {status}".format( node_ip=node.get("ip"), status="Stopping AppScale finished")) else: AppScaleLogger.warn("Node at {node_ip}: {status}".format( node_ip=node.get("ip"), status="Stopping AppScale failed")) terminated_successfully = False log_dump += u"Node at {node_ip}: {status}\nNode Output:"\ u"{output}".format(node_ip=node.get("ip"), status="Stopping AppScale failed", output=node.get("output")) AppScaleLogger.verbose(u"Output of node at {node_ip}:\n" u"{output}".format(node_ip=node.get("ip"), output=node.get("output")), is_verbose) if not terminated_successfully or machines > 0: LocalState.generate_crash_log(AppControllerException, log_dump) raise AppScaleException("{0} node(s) failed stopping AppScale, " "head node is still running AppScale services." .format(machines)) cls.stop_remote_appcontroller(shadow_host, keyname, is_verbose, clean) except socket.error as socket_error: AppScaleLogger.warn(u'Unable to talk to AppController: {}'. format(socket_error.message)) raise except Exception as exception: AppScaleLogger.verbose(u'Saw Exception while stopping AppScale {0}'. format(str(exception)), is_verbose) raise
def run_instances(cls, options): """Starts a new AppScale deployment with the parameters given. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Raises: AppControllerException: If the AppController on the head node crashes. When this occurs, the message in the exception contains the reason why the AppController crashed. BadConfigurationException: If the user passes in options that are not sufficient to start an AppScale deployment (e.g., running on EC2 but not specifying the AMI to use), or if the user provides us contradictory options (e.g., running on EC2 but not specifying EC2 credentials). """ LocalState.make_appscale_directory() LocalState.ensure_appscale_isnt_running(options.keyname, options.force) if options.infrastructure: if not options.disks and not options.test and not options.force: LocalState.ensure_user_wants_to_run_without_disks() reduced_version = '.'.join(x for x in APPSCALE_VERSION.split('.')[:2]) AppScaleLogger.log("Starting AppScale " + reduced_version) my_id = str(uuid.uuid4()) AppScaleLogger.remote_log_tools_state(options, my_id, "started", APPSCALE_VERSION) node_layout = NodeLayout(options) if not node_layout.is_valid(): raise BadConfigurationException("There were errors with your " + \ "placement strategy:\n{0}".format(str(node_layout.errors()))) head_node = node_layout.head_node() # Start VMs in cloud via cloud agent. if options.infrastructure: instance_ids, public_ips, private_ips = RemoteHelper.start_all_nodes( options, len(node_layout.nodes)) AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines " "for use. This can take few minutes.") # Set newly obtained node layout info for this deployment. for i, _ in enumerate(instance_ids): node_layout.nodes[i].public_ip = public_ips[i] node_layout.nodes[i].private_ip = private_ips[i] node_layout.nodes[i].instance_id = instance_ids[i] # Enables root logins and SSH access on the head node. RemoteHelper.enable_root_ssh(options, head_node.public_ip) AppScaleLogger.verbose("Node Layout: {}".format(node_layout.to_list()), options.verbose) # Ensure all nodes are compatible. RemoteHelper.ensure_machine_is_compatible( head_node.public_ip, options.keyname, options.verbose) # Use rsync to move custom code into the deployment. if options.scp: AppScaleLogger.log("Copying over local copy of AppScale from {0}". format(options.scp)) RemoteHelper.rsync_files(head_node.public_ip, options.keyname, options.scp, options.verbose) # Start services on head node. RemoteHelper.start_head_node(options, my_id, node_layout) # Write deployment metadata to disk (facilitates SSH operations, etc.) db_master = node_layout.db_master().private_ip head_node = node_layout.head_node().public_ip LocalState.update_local_metadata(options, db_master, head_node) # Copy the locations.json to the head node RemoteHelper.copy_local_metadata(node_layout.head_node().public_ip, options.keyname, options.verbose) # Wait for services on head node to start. secret_key = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(head_node, secret_key) try: while not acc.is_initialized(): AppScaleLogger.log('Waiting for head node to initialize...') # This can take some time in particular the first time around, since # we will have to initialize the database. time.sleep(cls.SLEEP_TIME*3) except socket.error as socket_error: AppScaleLogger.warn('Unable to initialize AppController: {}'. format(socket_error.message)) message = RemoteHelper.collect_appcontroller_crashlog( head_node, options.keyname, options.verbose) raise AppControllerException(message) # Set up admin account. try: # We don't need to have any exception information here: we do expect # some anyway while the UserAppServer is coming up. acc.does_user_exist("non-existent-user", True) except Exception: AppScaleLogger.log('UserAppServer not ready yet. Retrying ...') time.sleep(cls.SLEEP_TIME) if options.admin_user and options.admin_pass: AppScaleLogger.log("Using the provided admin username/password") username, password = options.admin_user, options.admin_pass elif options.test: AppScaleLogger.log("Using default admin username/password") username, password = LocalState.DEFAULT_USER, LocalState.DEFAULT_PASSWORD else: username, password = LocalState.get_credentials() RemoteHelper.create_user_accounts(username, password, head_node, options.keyname) acc.set_admin_role(username, 'true', cls.ADMIN_CAPABILITIES) # Wait for machines to finish loading and AppScale Dashboard to be deployed. RemoteHelper.wait_for_machines_to_finish_loading(head_node, options.keyname) RemoteHelper.sleep_until_port_is_open(LocalState.get_login_host( options.keyname), RemoteHelper.APP_DASHBOARD_PORT, options.verbose) AppScaleLogger.success("AppScale successfully started!") AppScaleLogger.success("View status information about your AppScale " + \ "deployment at http://{0}:{1}".format(LocalState.get_login_host( options.keyname), RemoteHelper.APP_DASHBOARD_PORT)) AppScaleLogger.remote_log_tools_state(options, my_id, "finished", APPSCALE_VERSION)
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) if options.infrastructure: instance_id, public_ip, private_ip = cls.spawn_node_in_cloud(options) else: instance_id = cls.DUMMY_INSTANCE_ID public_ip = node_layout.head_node().public_ip private_ip = node_layout.head_node().private_ip AppScaleLogger.log("Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), public_ip)) try: cls.ensure_machine_is_compatible(public_ip, options.keyname, options.table, options.verbose) except AppScaleException as ase: # On failure shutdown the cloud instances, cleanup the keys, but only # if --test is not set. if options.infrastructure: if not options.test: try: cls.terminate_cloud_instance(instance_id, options) except Exception as tcie: AppScaleLogger.log("Error terminating instances: {0}" .format(str(tcie))) raise AppScaleException("{0} Please ensure that the "\ "image {1} has AppScale {2} installed on it." .format(str(ase), options.machine, APPSCALE_VERSION)) else: raise AppScaleException("{0} Please login to that machine and ensure "\ "that AppScale {1} is installed on it." .format(str(ase), APPSCALE_VERSION)) if options.scp: AppScaleLogger.log("Copying over local copy of AppScale from {0}".format( options.scp)) cls.rsync_files(public_ip, options.keyname, options.scp, options.verbose) # On Euca, we've seen issues where attaching the EBS volume right after # the instance starts doesn't work. This sleep lets the instance fully # start up and get volumes attached to it correctly. if options.infrastructure and options.infrastructure == 'euca' and \ options.disks: time.sleep(30) if options.infrastructure: agent = InfrastructureAgentFactory.create_agent(options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[agent.PARAM_REGION] else: additional_params = {} deployment_params = LocalState.generate_deployment_params(options, node_layout, public_ip, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) AppScaleLogger.log("Head node successfully initialized at {0}.".format(public_ip)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(public_ip, options) cls.run_user_commands(public_ip, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, secret_key) locations = [{ 'public_ip' : public_ip, 'private_ip' : private_ip, 'jobs' : node_layout.head_node().roles, 'instance_id' : instance_id, 'disk' : node_layout.head_node().disk }] try: acc.set_parameters(locations, LocalState.map_to_array(deployment_params)) except Exception as exception: AppScaleLogger.warn('Saw Exception while setting AC parameters: {0}' \ .format(str(exception))) message = RemoteHelper.collect_appcontroller_crashlog(public_ip, options.keyname, options.verbose) raise AppControllerException(message) return public_ip, instance_id