def copy_app_to_host(cls, app_location, keyname, is_verbose): """Copies the given application to a machine running the Login service within an AppScale deployment. Args: app_location: The location on the local filesystem where the application can be found. keyname: The name of the SSH keypair that uniquely identifies this AppScale deployment. is_verbose: A bool that indicates if we should print the commands we exec to copy the app to the remote host to stdout. Returns: A str corresponding to the location on the remote filesystem where the application was copied to. """ app_id = AppEngineHelper.get_app_id_from_app_config(app_location) AppScaleLogger.log("Tarring application") rand = str(uuid.uuid4()).replace('-', '')[:8] local_tarred_app = "{0}/appscale-app-{1}-{2}.tar.gz".\ format(tempfile.gettempdir(), app_id, rand) cmd = "cd '{0}' && COPYFILE_DISABLE=1 tar -czhf {1} --exclude='*.pyc' *".\ format(app_location, local_tarred_app) LocalState.shell(cmd, is_verbose) AppScaleLogger.log("Copying over application") remote_app_tar = "{0}/{1}.tar.gz".format(cls.REMOTE_APP_DIR, app_id) cls.scp(LocalState.get_login_host(keyname), keyname, local_tarred_app, remote_app_tar, is_verbose) AppScaleLogger.verbose("Removing local copy of tarred application", is_verbose) os.remove(local_tarred_app) return remote_app_tar
def terminate_virtualized_cluster(cls, keyname, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ AppScaleLogger.log( "Terminating appscale deployment with keyname {0}".format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: all_ips = acc.get_all_public_ips() except Exception as exception: AppScaleLogger.warn( 'Saw Exception while getting deployments IPs {0}'.format( str(exception))) all_ips = LocalState.get_all_public_ips(keyname) threads = [] for ip in all_ips: thread = threading.Thread(target=cls.stop_remote_appcontroller, args=(ip, keyname, is_verbose)) thread.start() threads.append(thread) for thread in threads: thread.join() boxes_shut_down = 0 is_running_regex = re.compile("appscale-controller stop") for ip in all_ips: AppScaleLogger.log( "Shutting down AppScale API services at {0}".format(ip)) while True: remote_output = cls.ssh(ip, keyname, 'ps x', is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) if not is_running_regex.match(remote_output): break time.sleep(0.3) boxes_shut_down += 1 if boxes_shut_down != len(all_ips): raise AppScaleException( "Couldn't terminate your AppScale deployment on" " all machines - please do so manually.") AppScaleLogger.log( "Terminated AppScale on {0} machines.".format(boxes_shut_down))
def create_network(self, parameters): """ Creates a new network in Google Compute Engine with the specified name. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the network that we should create in GCE. Returns: The URL corresponding to the name of the network that was created, for use with binding this network to one or more firewalls. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.networks().insert( project=parameters[self.PARAM_PROJECT], body={ "name": parameters[self.PARAM_GROUP], "description": "Network used for AppScale instances", "IPv4Range": "10.240.0.0/16" }) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT]) return response['targetLink']
def terminate_instances(self, parameters): """ Deletes the instances specified in 'parameters' running in Google Compute Engine. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key mapping to a list of instance names that should be deleted. """ instance_ids = parameters[self.PARAM_INSTANCE_IDS] responses = [] for instance_id in instance_ids: gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().delete( project=parameters[self.PARAM_PROJECT], zone=parameters[self.PARAM_ZONE], instance=instance_id) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) responses.append(response) for response in responses: gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def does_address_exist(self, parameters): """ Queries Google Compute Engine to see if the specified static IP address exists for this user. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the static IP address that we should check for existence. Returns: True if the named address exists, and False otherwise. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.addresses().list( project=parameters[self.PARAM_PROJECT], filter="address eq {0}".format(parameters[self.PARAM_STATIC_IP]), region=parameters[self.PARAM_REGION] ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) if 'items' in response: return True else: return False
def does_disk_exist(self, parameters, disk): """ Queries Google Compute Engine to see if the specified persistent disk exists for this user. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. disk: A str containing the name of the disk that we should check for existence. Returns: True if the named persistent disk exists, and False otherwise. """ gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.disks().get( project=parameters[self.PARAM_PROJECT], disk=disk, zone=parameters[self.PARAM_ZONE]) response = request.execute(auth_http) AppScaleLogger.verbose( str(response), parameters[self.PARAM_VERBOSE]) return True except apiclient.errors.HttpError: return False
def add_access_config(self, parameters, instance_id, static_ip): """ Instructs Google Compute Engine to use the given IP address as the public IP for the named instance. This assumes that there is no existing public IP address for the named instance. If this is not the case, callers should use delete_access_config first to remove it. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key mapping to a list of instance names that should be deleted. instance_id: A str naming the running instance that the new public IP address should be added to. static_ip: A str naming the already allocated static IP address that will be used for the named instance. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().addAccessConfig( project=parameters[self.PARAM_PROJECT], instance=instance_id, networkInterface="nic0", zone=parameters[self.PARAM_ZONE], body={ "kind": "compute#accessConfig", "type": "ONE_TO_ONE_NAT", "name": "External NAT", "natIP": static_ip }) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE])
def does_disk_exist(self, parameters, disk): """ Queries Google Compute Engine to see if the specified persistent disk exists for this user. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. disk: A str containing the name of the disk that we should check for existence. Returns: True if the named persistent disk exists, and False otherwise. """ gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.disks().get( project=parameters[self.PARAM_PROJECT], disk=disk, zone=parameters[self.PARAM_ZONE]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) return True except apiclient.errors.HttpError: return False
def add_access_config(self, parameters, instance_id, static_ip): """ Instructs Google Compute Engine to use the given IP address as the public IP for the named instance. This assumes that there is no existing public IP address for the named instance. If this is not the case, callers should use delete_access_config first to remove it. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key mapping to a list of instance names that should be deleted. instance_id: A str naming the running instance that the new public IP address should be added to. static_ip: A str naming the already allocated static IP address that will be used for the named instance. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().addAccessConfig( project=parameters[self.PARAM_PROJECT], instance=instance_id, networkInterface="nic0", zone=parameters[self.PARAM_ZONE], body={ "kind": "compute#accessConfig", "type" : "ONE_TO_ONE_NAT", "name" : "External NAT", "natIP" : static_ip } ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE])
def create_firewall(self, parameters, network_url): """ Creates a new firewall in Google Compute Engine with the specified name, bound to the specified network. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the firewall that we should create. network_url: A str containing the URL of the network that this new firewall should be applied to. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.firewalls().insert( project=parameters[self.PARAM_PROJECT], body={ "name" : parameters[self.PARAM_GROUP], "description" : "Firewall used for AppScale instances", "network" : network_url, "sourceRanges" : ["0.0.0.0/0"], "allowed" : [ {"IPProtocol" : "tcp", "ports": ["1-65535"]}, {"IPProtocol" : "udp", "ports": ["1-65535"]}, {"IPProtocol" : "icmp"} ] } ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def terminate_instances(self, parameters): """ Deletes the instances specified in 'parameters' running in Google Compute Engine. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key mapping to a list of instance names that should be deleted. """ instance_ids = parameters[self.PARAM_INSTANCE_IDS] responses = [] for instance_id in instance_ids: gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().delete( project=parameters[self.PARAM_PROJECT], zone=parameters[self.PARAM_ZONE], instance=instance_id ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) responses.append(response) for response in responses: gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def copy_app_to_host(cls, app_location, app_id, keyname, is_verbose, extras=None, custom_service_yaml=None): """Copies the given application to a machine running the Login service within an AppScale deployment. Args: app_location: The location on the local filesystem where the application can be found. app_id: The project to use for this application. keyname: The name of the SSH keypair that uniquely identifies this AppScale deployment. is_verbose: A bool that indicates if we should print the commands we exec to copy the app to the remote host to stdout. extras: A dictionary containing a list of files to include in the upload. custom_service_yaml: A string specifying the location of the service yaml being deployed. Returns: A str corresponding to the location on the remote filesystem where the application was copied to. """ AppScaleLogger.log("Tarring application") rand = str(uuid.uuid4()).replace('-', '')[:8] local_tarred_app = "{0}/appscale-app-{1}-{2}.tar.gz".\ format(tempfile.gettempdir(), app_id, rand) # Collect list of files that should be included in the tarball. app_files = {} for root, _, filenames in os.walk(app_location, followlinks=True): relative_dir = os.path.relpath(root, app_location) for filename in filenames: # Ignore compiled Python files. if filename.endswith('.pyc'): continue relative_path = os.path.join(relative_dir, filename) app_files[relative_path] = os.path.join(root, filename) if extras is not None: app_files.update(extras) with tarfile.open(local_tarred_app, 'w:gz') as app_tar: for tarball_path, local_path in app_files.items(): # Replace app.yaml with the service yaml being deployed. if custom_service_yaml and os.path.normpath(tarball_path) == 'app.yaml': continue app_tar.add(local_path, tarball_path) if custom_service_yaml: app_tar.add(custom_service_yaml, 'app.yaml') AppScaleLogger.log("Copying over application") remote_app_tar = "{0}/{1}.tar.gz".format(cls.REMOTE_APP_DIR, app_id) cls.scp(LocalState.get_login_host(keyname), keyname, local_tarred_app, remote_app_tar, is_verbose) AppScaleLogger.verbose("Removing local copy of tarred application", is_verbose) os.remove(local_tarred_app) return remote_app_tar
def does_address_exist(self, parameters): """ Queries Google Compute Engine to see if the specified static IP address exists for this user. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the static IP address that we should check for existence. Returns: True if the named address exists, and False otherwise. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.addresses().list( project=parameters[self.PARAM_PROJECT], filter="address eq {0}".format(parameters[self.PARAM_STATIC_IP]), region=parameters[self.PARAM_REGION]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) if 'items' in response: return True else: return False
def shell(cls, command, is_verbose, num_retries=DEFAULT_NUM_RETRIES): """Executes a command on this machine, retrying it if it initially fails. Args: command: A str representing the command to execute. is_verbose: A bool that indicates if we should print the command we are executing to stdout. num_retries: The number of times we should try to execute the given command before aborting. Returns: The standard output and standard error produced when the command executes. Raises: ShellException: If, after five attempts, executing the named command failed. """ tries_left = num_retries while tries_left: AppScaleLogger.verbose("shell> {0}".format(command), is_verbose) the_temp_file = tempfile.TemporaryFile() result = subprocess.Popen(command, shell=True, stdout=the_temp_file, stderr=subprocess.STDOUT) result.wait() if result.returncode == 0: output = the_temp_file.read() the_temp_file.close() return output AppScaleLogger.verbose("Command failed. Trying again momentarily." \ .format(command), is_verbose) tries_left -= 1 time.sleep(1) raise ShellException('Could not execute command: {0}'.format(command))
def create_network(self, parameters): """ Creates a new network in Google Compute Engine with the specified name. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the network that we should create in GCE. Returns: The URL corresponding to the name of the network that was created, for use with binding this network to one or more firewalls. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.networks().insert( project=parameters[self.PARAM_PROJECT], body={ "name" : parameters[self.PARAM_GROUP], "description" : "Network used for AppScale instances", "IPv4Range" : "10.240.0.0/16" } ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT]) return response['targetLink']
def terminate_virtualized_cluster(cls, keyname, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ AppScaleLogger.log("Terminating appscale deployment with keyname {0}" .format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: all_ips = acc.get_all_public_ips() except Exception as exception: AppScaleLogger.warn('Saw Exception while getting deployments IPs {0}'. format(str(exception))) all_ips = LocalState.get_all_public_ips(keyname) threads = [] for ip in all_ips: thread = threading.Thread(target=cls.stop_remote_appcontroller, args=(ip, keyname, is_verbose)) thread.start() threads.append(thread) for thread in threads: thread.join() boxes_shut_down = 0 is_running_regex = re.compile("appscale-controller stop") for ip in all_ips: AppScaleLogger.log("Shutting down AppScale API services at {0}". format(ip)) while True: remote_output = cls.ssh(ip, keyname, 'ps x', is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) if not is_running_regex.match(remote_output): break time.sleep(0.3) boxes_shut_down += 1 if boxes_shut_down != len(all_ips): raise AppScaleException("Couldn't terminate your AppScale deployment on" " all machines - please do so manually.") AppScaleLogger.log("Terminated AppScale on {0} machines.". format(boxes_shut_down))
def remove_app(cls, options): """Instructs AppScale to no longer host the named application. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ if not options.confirm: response = raw_input( 'Are you sure you want to remove this application? (y/N) ') if response.lower() not in ['y', 'yes']: raise AppScaleException("Cancelled application removal.") login_host = LocalState.get_login_host(options.keyname) secret = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(login_host, secret) if not acc.is_app_running(options.appname): raise AppScaleException("The given application is not currently running.") # Makes a call to the AppController to get all the stats and looks # through them for the http port the app can be reached on. http_port = None for _ in range(cls.MAX_RETRIES + 1): result = acc.get_all_stats() try: json_result = json.loads(result) apps_result = json_result['apps'] current_app = apps_result[options.appname] http_port = current_app['http'] if http_port: break time.sleep(cls.SLEEP_TIME) except (KeyError, ValueError): AppScaleLogger.verbose("Got json error from get_all_data result.", options.verbose) time.sleep(cls.SLEEP_TIME) if not http_port: raise AppScaleException( "Unable to get the serving port for the application.") acc.stop_app(options.appname) AppScaleLogger.log("Please wait for your app to shut down.") for _ in range(cls.MAX_RETRIES + 1): if RemoteHelper.is_port_open(login_host, http_port, options.verbose): time.sleep(cls.SLEEP_TIME) AppScaleLogger.log("Waiting for {0} to terminate...".format( options.appname)) else: AppScaleLogger.success("Done shutting down {0}.".format( options.appname)) return AppScaleLogger.warn("App {0} may still be running.".format( options.appname))
def shell(cls, command, is_verbose, num_retries=DEFAULT_NUM_RETRIES, stdin=None): """Executes a command on this machine, retrying it up to five times if it initially fails. Args: command: A str representing the command to execute. is_verbose: A bool that indicates if we should print the command we are executing to stdout. num_retries: The number of times we should try to execute the given command before aborting. stdin: A str that is passes as standard input to the process Returns: A str with both the standard output and standard error produced when the command executes. Raises: ShellException: If, after five attempts, executing the named command failed. """ tries_left = num_retries try: while tries_left: AppScaleLogger.verbose("shell> {0}".format(command), is_verbose) the_temp_file = tempfile.NamedTemporaryFile() if stdin is not None: stdin_strio = tempfile.TemporaryFile() stdin_strio.write(stdin) stdin_strio.seek(0) AppScaleLogger.verbose(" stdin str: {0}".format(stdin), is_verbose) result = subprocess.Popen( command, shell=True, stdout=the_temp_file, stdin=stdin_strio, stderr=subprocess.STDOUT ) else: result = subprocess.Popen(command, shell=True, stdout=the_temp_file, stderr=subprocess.STDOUT) AppScaleLogger.verbose(" stdout buffer: {0}".format(the_temp_file.name), is_verbose) result.wait() if stdin is not None: stdin_strio.close() if result.returncode == 0: the_temp_file.seek(0) output = the_temp_file.read() the_temp_file.close() return output tries_left -= 1 if tries_left: the_temp_file.close() AppScaleLogger.verbose("Command failed. Trying again momentarily.".format(command), is_verbose) else: the_temp_file.seek(0) output = the_temp_file.read() the_temp_file.close() if stdin: raise ShellException("Executing command '{0} {1}' failed:\n{2}".format(command, stdin, output)) else: raise ShellException("Executing command '{0}' failed:\n{1}".format(command, output)) time.sleep(1) except OSError as os_error: if stdin: raise ShellException("Error executing command: '{0} {1}':{2}".format(command, stdin, os_error)) else: raise ShellException("Error executing command: '{0}':{1}".format(command, os_error))
def does_ssh_key_exist(self, parameters): """ Queries Google Compute Engine to see if the specified SSH key exists. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. We don't have an additional key for the name of the SSH key, since we use the one in ~/.ssh. Returns: A tuple of two items. The first item is a bool that is True if our public key's contents are in GCE, and False otherwise, while the second item is the contents of all SSH keys stored in GCE. """ our_public_ssh_key = None public_ssh_key_location = LocalState.LOCAL_APPSCALE_PATH + \ parameters[self.PARAM_KEYNAME] + ".pub" with open(public_ssh_key_location) as file_handle: our_public_ssh_key = os.getlogin() + ":" + file_handle.read( ).rstrip() gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.projects().get( project=parameters[self.PARAM_PROJECT]) response = request.execute(auth_http) AppScaleLogger.verbose( str(response), parameters[self.PARAM_VERBOSE]) if not 'items' in response['commonInstanceMetadata']: return False, "" metadata = response['commonInstanceMetadata']['items'] if not metadata: return False, "" for item in metadata: if item['key'] != 'sshKeys': continue # Now that we know there's one or more SSH keys, just make sure that # ours is in this list. all_ssh_keys = item['value'] if our_public_ssh_key in all_ssh_keys: return True, all_ssh_keys return False, all_ssh_keys except apiclient.errors.HttpError: return False, ""
def remove_app(cls, options): """Instructs AppScale to no longer host the named application. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ if not options.confirm: response = raw_input("Are you sure you want to remove this application? (y/N) ") if response.lower() not in ["y", "yes"]: raise AppScaleException("Cancelled application removal.") login_host = LocalState.get_login_host(options.keyname) secret = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(login_host, secret) if not acc.is_app_running(options.appname): raise AppScaleException("The given application is not currently running.") # Makes a call to the AppController to get all the stats and looks # through them for the http port the app can be reached on. http_port = None for _ in range(cls.MAX_RETRIES + 1): result = acc.get_all_stats() try: json_result = json.loads(result) apps_result = json_result["apps"] current_app = apps_result[options.appname] http_port = current_app["http"] if http_port: break time.sleep(cls.SLEEP_TIME) except (KeyError, ValueError): AppScaleLogger.verbose("Got json error from get_all_data result.", options.verbose) time.sleep(cls.SLEEP_TIME) if not http_port: raise AppScaleException("Unable to get the serving port for the application.") acc.stop_app(options.appname) AppScaleLogger.log("Please wait for your app to shut down.") for _ in range(cls.MAX_RETRIES + 1): if RemoteHelper.is_port_open(login_host, http_port, options.verbose): time.sleep(cls.SLEEP_TIME) AppScaleLogger.log("Waiting for {0} to terminate...".format(options.appname)) else: AppScaleLogger.success("Done shutting down {0}.".format(options.appname)) return AppScaleLogger.warn("App {0} may still be running.".format(options.appname))
def does_ssh_key_exist(self, parameters): """ Queries Google Compute Engine to see if the specified SSH key exists. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. We don't have an additional key for the name of the SSH key, since we use the one in ~/.ssh. Returns: A tuple of two items. The first item is a bool that is True if our public key's contents are in GCE, and False otherwise, while the second item is the contents of all SSH keys stored in GCE. """ our_public_ssh_key = None public_ssh_key_location = LocalState.LOCAL_APPSCALE_PATH + \ parameters[self.PARAM_KEYNAME] + ".pub" with open(public_ssh_key_location) as file_handle: our_public_ssh_key = os.getlogin() + ":" + file_handle.read( ).rstrip() gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.projects().get( project=parameters[self.PARAM_PROJECT]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) if not 'items' in response['commonInstanceMetadata']: return False, "" metadata = response['commonInstanceMetadata']['items'] if not metadata: return False, "" for item in metadata: if item['key'] != 'sshKeys': continue # Now that we know there's one or more SSH keys, just make sure that # ours is in this list. all_ssh_keys = item['value'] if our_public_ssh_key in all_ssh_keys: return True, all_ssh_keys return False, all_ssh_keys except apiclient.errors.HttpError: return False, ""
def get_uaserver_host(self, is_verbose): """Queries the AppController to see which machine is hosting the UserAppServer, and at what IP it can be reached. Args: is_verbose: A bool that indicates if we should print out the first AppController's status when we query it. Returns: The IP address where a UserAppServer can be located (although it is not guaranteed to be running). Raises: TimeoutException if MAX_RETRIES is attempted with no answer from controller. """ last_known_state = None retries = 0 while True: try: status = self.get_status() AppScaleLogger.verbose('Received status from head node: ' + status, is_verbose) if status == self.BAD_SECRET_MESSAGE: raise AppControllerException("Could not authenticate successfully" + \ " to the AppController. You may need to change the keyname in use.") match = re.search(r'Database is at (.*)', status) if match and match.group(1) != 'not-up-yet': return match.group(1) else: match = re.search(r'Current State: (.*)', status) if match: if last_known_state != match.group(1): last_known_state = match.group(1) AppScaleLogger.log(last_known_state) else: AppScaleLogger.log('Waiting for AppScale nodes to complete ' 'the initialization process') except (AppControllerException, socket.error) as exception: raise exception except Exception as exception: AppScaleLogger.warn('Saw {0}, waiting a few moments to try again' \ .format(str(exception))) time.sleep(self.WAIT_TIME) retries += 1 if retries >= self.MAX_RETRIES: AppScaleLogger.warn("Too many retries to connect to UAServer.") raise TimeoutException()
def sleep_until_port_is_open(cls, host, port, is_verbose): """Queries the given host to see if the named port is open, and if not, waits until it is. Args: host: A str representing the host whose port we should be querying. port: An int representing the port that should eventually be open. verbose: A bool that indicates if we should print failure messages to stdout (e.g., connection refused messages that can occur when we wait for services to come up). """ sleep_time = 1 while not cls.is_port_open(host, port, is_verbose): AppScaleLogger.verbose("Waiting for {0}:{1} to open".format(host, port), is_verbose) time.sleep(sleep_time) sleep_time = min(sleep_time * 2, 20)
def unmount_persistent_disk(cls, host, keyname, is_verbose): """Unmounts the persistent disk that was previously mounted on the named machine. Args: host: A str that names the IP address or FQDN where the machine whose disk needs to be unmounted can be found. keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ try: remote_output = cls.ssh(host, keyname, 'umount {0}'.format( cls.PERSISTENT_MOUNT_POINT), is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) except ShellException: pass
def unmount_persistent_disk(cls, host, keyname, is_verbose): """Unmounts the persistent disk that was previously mounted on the named machine. Args: host: A str that names the IP address or FQDN where the machine whose disk needs to be unmounted can be found. keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ try: remote_output = cls.ssh( host, keyname, 'umount {0}'.format(cls.PERSISTENT_MOUNT_POINT), is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) except ShellException: pass
def is_port_open(cls, host, port, is_verbose): """Queries the given host to see if the named port is open. Args: host: A str representing the host whose port we should be querying. port: An int representing the port that should eventually be open. verbose: A bool that indicates if we should print failure messages to stdout (e.g., connection refused messages that can occur when we wait for services to come up). Returns: True if the port is open, False otherwise. """ try: sock = socket.socket() sock.connect((host, port)) return True except Exception as exception: AppScaleLogger.verbose(str(exception), is_verbose) return False
def get_uaserver_host(self, is_verbose): """Queries the AppController to see which machine is hosting the UserAppServer, and at what IP it can be reached. Args: is_verbose: A bool that indicates if we should print out the first AppController's status when we query it. Returns: The IP address where a UserAppServer can be located (although it is not guaranteed to be running). """ last_known_state = None while True: try: status = self.get_status() AppScaleLogger.verbose( 'Received status from head node: ' + status, is_verbose) if status == self.BAD_SECRET_MESSAGE: raise AppControllerException("Could not authenticate successfully" + \ " to the AppController. You may need to change the keyname in use.") match = re.search(r'Database is at (.*)', status) if match and match.group(1) != 'not-up-yet': return match.group(1) else: match = re.search(r'Current State: (.*)', status) if match: if last_known_state != match.group(1): last_known_state = match.group(1) AppScaleLogger.log(last_known_state) else: AppScaleLogger.log( 'Waiting for AppScale nodes to complete ' 'the initialization process') except (AppControllerException, socket.error) as exception: raise exception except Exception as exception: AppScaleLogger.warn('Saw {0}, waiting a few moments to try again' \ .format(str(exception))) time.sleep(self.WAIT_TIME)
def delete_firewall(self, parameters): """ Deletes a firewall in Google Compute Engine with the specified name. Callers should not invoke this method until they are certain that no instances are using the specified firewall, or this method will fail. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the firewall that we should create. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.firewalls().delete( project=parameters[self.PARAM_PROJECT], firewall=parameters[self.PARAM_GROUP]) response = request.execute(auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def delete_firewall(self, parameters): """ Deletes a firewall in Google Compute Engine with the specified name. Callers should not invoke this method until they are certain that no instances are using the specified firewall, or this method will fail. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the firewall that we should create. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.firewalls().delete( project=parameters[self.PARAM_PROJECT], firewall=parameters[self.PARAM_GROUP]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def get_uaserver_host(self, is_verbose): """Queries the AppController to see which machine is hosting the UserAppServer, and at what IP it can be reached. Args: is_verbose: A bool that indicates if we should print out the first AppController's status when we query it. Returns: The IP address where a UserAppServer can be located (although it is not guaranteed to be running). """ last_known_state = None while True: try: status = self.get_status() AppScaleLogger.verbose("Received status from head node: " + status, is_verbose) if status == self.BAD_SECRET_MESSAGE: raise AppControllerException( "Could not authenticate successfully" + " to the AppController. You may need to change the keyname in use." ) match = re.search(r"Database is at (.*)", status) if match and match.group(1) != "not-up-yet": return match.group(1) else: match = re.search(r"Current State: (.*)", status) if match: if last_known_state != match.group(1): last_known_state = match.group(1) AppScaleLogger.log(last_known_state) else: AppScaleLogger.log("Waiting for AppScale nodes to complete " "the initialization process") except AppControllerException as exception: raise exception except Exception as exception: AppScaleLogger.warn("Saw {0}, waiting a few moments to try again".format(str(exception))) time.sleep(self.WAIT_TIME)
def delete_access_config(self, parameters, instance_id): """ Instructs Google Compute Engine to remove the public IP address from the named instance. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key mapping to a list of instance names that should be deleted. instance_id: A str naming the running instance that the new public IP address should be added to. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().deleteAccessConfig( project=parameters[self.PARAM_PROJECT], accessConfig="External NAT", instance=instance_id, networkInterface="nic0", zone=parameters[self.PARAM_ZONE]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE])
def describe_instances(self, parameters): """ Queries Google Compute Engine to see which instances are currently running, and retrieve information about their public and private IPs. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. Returns: A tuple of the form (public_ips, private_ips, instance_ids), where each member is a list. Items correspond to each other across these lists, so a caller is guaranteed that item X in each list belongs to the same virtual machine. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().list( project=parameters[self.PARAM_PROJECT], filter="name eq appscale-{0}-.*".format( parameters[self.PARAM_GROUP]), zone=parameters[self.PARAM_ZONE]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) instance_ids = [] public_ips = [] private_ips = [] if response and 'items' in response: instances = response['items'] for instance in instances: if instance['status'] == "RUNNING": instance_ids.append(instance['name']) network_interface = instance['networkInterfaces'][0] public_ips.append( network_interface['accessConfigs'][0]['natIP']) private_ips.append(network_interface['networkIP']) return public_ips, private_ips, instance_ids
def does_image_exist(self, parameters): """ Queries Google Compute Engine to see if the specified image exists for this user. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the image that we should check for existence. Returns: True if the named image exists, and False otherwise. """ gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.images().get(project=parameters[self.PARAM_PROJECT], image=parameters[self.PARAM_IMAGE_ID]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) return True except apiclient.errors.HttpError: return False
def describe_instances(self, parameters, pending=False): """ Queries Google Compute Engine to see which instances are currently running, and retrieve information about their public and private IPs. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. pending: Boolean if we should show pending instances. Returns: A tuple of the form (public_ips, private_ips, instance_ids), where each member is a list. Items correspond to each other across these lists, so a caller is guaranteed that item X in each list belongs to the same virtual machine. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().list( project=parameters[self.PARAM_PROJECT], filter="name eq appscale-{0}-.*".format(parameters[self.PARAM_GROUP]), zone=parameters[self.PARAM_ZONE] ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) instance_ids = [] public_ips = [] private_ips = [] if response and 'items' in response: instances = response['items'] for instance in instances: if instance['status'] == "RUNNING": instance_ids.append(instance['name']) network_interface = instance['networkInterfaces'][0] public_ips.append(network_interface['accessConfigs'][0]['natIP']) private_ips.append(network_interface['networkIP']) return public_ips, private_ips, instance_ids
def assert_credentials_are_valid(self, parameters): """Contacts GCE to see if the given credentials are valid. Args: parameters: A dict containing the credentials necessary to interact with GCE. Raises: AgentConfigurationException: If the given GCE credentials are invalid. """ gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().list(project=parameters [self.PARAM_PROJECT], zone=parameters[self.PARAM_ZONE]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) return True except apiclient.errors.HttpError: raise AgentConfigurationException("We couldn't validate your GCE" + \ "credentials. Are your credentials valid?")
def create_ssh_key(self, parameters, all_ssh_keys): """ Creates a new SSH key in Google Compute Engine with the contents of our newly generated public key. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. all_ssh_keys: A str that contains all of the SSH keys that are currently passed in to GCE instances. """ our_public_ssh_key = None public_ssh_key_location = LocalState.LOCAL_APPSCALE_PATH + \ parameters[self.PARAM_KEYNAME] + ".pub" with open(public_ssh_key_location) as file_handle: our_public_ssh_key = os.getlogin() + ":" + file_handle.read().rstrip() if all_ssh_keys: new_all_ssh_keys = our_public_ssh_key + "\n" + all_ssh_keys else: new_all_ssh_keys = our_public_ssh_key gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.projects().setCommonInstanceMetadata( project=parameters[self.PARAM_PROJECT], body={ "kind": "compute#metadata", "items": [{ "key": "sshKeys", "value": new_all_ssh_keys }] } ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def create_firewall(self, parameters, network_url): """ Creates a new firewall in Google Compute Engine with the specified name, bound to the specified network. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the firewall that we should create. network_url: A str containing the URL of the network that this new firewall should be applied to. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.firewalls().insert( project=parameters[self.PARAM_PROJECT], body={ "name": parameters[self.PARAM_GROUP], "description": "Firewall used for AppScale instances", "network": network_url, "sourceRanges": ["0.0.0.0/0"], "allowed": [{ "IPProtocol": "tcp", "ports": ["1-65535"] }, { "IPProtocol": "udp", "ports": ["1-65535"] }, { "IPProtocol": "icmp" }] }) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def create_ssh_key(self, parameters, all_ssh_keys): """ Creates a new SSH key in Google Compute Engine with the contents of our newly generated public key. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. all_ssh_keys: A str that contains all of the SSH keys that are currently passed in to GCE instances. """ our_public_ssh_key = None public_ssh_key_location = LocalState.LOCAL_APPSCALE_PATH + \ parameters[self.PARAM_KEYNAME] + ".pub" with open(public_ssh_key_location) as file_handle: our_public_ssh_key = os.getlogin() + ":" + file_handle.read( ).rstrip() if all_ssh_keys: new_all_ssh_keys = our_public_ssh_key + "\n" + all_ssh_keys else: new_all_ssh_keys = our_public_ssh_key gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.projects().setCommonInstanceMetadata( project=parameters[self.PARAM_PROJECT], body={ "kind": "compute#metadata", "items": [{ "key": "sshKeys", "value": new_all_ssh_keys }] }) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def delete_access_config(self, parameters, instance_id): """ Instructs Google Compute Engine to remove the public IP address from the named instance. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key mapping to a list of instance names that should be deleted. instance_id: A str naming the running instance that the new public IP address should be added to. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().deleteAccessConfig( project=parameters[self.PARAM_PROJECT], accessConfig="External NAT", instance=instance_id, networkInterface="nic0", zone=parameters[self.PARAM_ZONE] ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE])
def sleep_until_port_is_open(cls, host, port, is_verbose): """Queries the given host to see if the named port is open, and if not, waits until it is. Args: host: A str representing the host whose port we should be querying. port: An int representing the port that should eventually be open. verbose: A bool that indicates if we should print failure messages to stdout (e.g., connection refused messages that can occur when we wait for services to come up). Raises: TimeoutException if the port does not open in a certain amount of time. """ sleep_time = cls.MAX_WAIT_TIME while sleep_time > 0: if cls.is_port_open(host, port, is_verbose): return AppScaleLogger.verbose("Waiting {2} second(s) for {0}:{1} to open".\ format(host, port, cls.WAIT_TIME), is_verbose) time.sleep(cls.WAIT_TIME) sleep_time -= cls.WAIT_TIME raise TimeoutException("Port {}:{} did not open in time. " "Aborting...".format(host, port))
def does_firewall_exist(self, parameters): """ Queries Google Compute Engine to see if the specified firewall exists. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the firewall that we should query for existence in GCE. Returns: True if the named firewall exists, and False otherwise. """ gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.firewalls().get( project=parameters[self.PARAM_PROJECT], firewall=parameters[self.PARAM_GROUP]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) return True except apiclient.errors.HttpError: return False
def create_scratch_disk(self, parameters): """ Creates a disk from a given machine image. GCE does not support scratch disks on API version v1 and higher. We create a persistent disk upon creation to act like one to keep the abstraction used in other infrastructures. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. Returns: A str, the url to the disk to use. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) disk_name = self.generate_disk_name(parameters) project_url = '{0}{1}'.format(self.GCE_URL, parameters[self.PARAM_PROJECT]) source_image_url = '{0}{1}/global/images/{2}'.format(self.GCE_URL, parameters[self.PARAM_PROJECT], parameters[self.PARAM_IMAGE_ID]) request = gce_service.disks().insert( project=parameters[self.PARAM_PROJECT], zone=parameters[self.PARAM_ZONE], body={ 'name':disk_name }, sourceImage=source_image_url ) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT]) disk_url = "{0}/zones/{1}/disks/{2}".format( project_url, parameters[self.PARAM_ZONE], disk_name) return disk_url
def detach_disk(self, parameters, disk_name, instance_id): """ Detaches the persistent disk specified in 'disk_name' from the named instance. Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. disk_name: A str naming the persistent disk to detach. instance_id: A str naming the id of the instance that the disk should be detached from. """ gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) project_id = parameters[self.PARAM_PROJECT] request = gce_service.instances().detachDisk( project=project_id, zone=parameters[self.PARAM_ZONE], instance=instance_id, deviceName='sdb') response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT])
def assert_credentials_are_valid(self, parameters): """Contacts GCE to see if the given credentials are valid. Args: parameters: A dict containing the credentials necessary to interact with GCE. Raises: AgentConfigurationException: If the given GCE credentials are invalid. """ gce_service, credentials = self.open_connection(parameters) try: http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().list( project=parameters[self.PARAM_PROJECT], zone=parameters[self.PARAM_ZONE]) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) return True except apiclient.errors.HttpError: raise AgentConfigurationException("We couldn't validate your GCE" + \ "credentials. Are your credentials valid?")
def run_instances(self, count, parameters, security_configured): """ Starts 'count' instances in Google Compute Engine, and returns once they have been started. Callers should create a network and attach a firewall to it before using this method, or the newly created instances will not have a network and firewall to attach to (and thus this method will fail). Args: count: An int that specifies how many virtual machines should be started. parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. security_configured: Unused, as we assume that the network and firewall has already been set up. """ project_id = parameters[self.PARAM_PROJECT] image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] zone = parameters[self.PARAM_ZONE] AppScaleLogger.log("Starting {0} machines with machine id {1}, with " \ "instance type {2}, keyname {3}, in security group {4}, in zone {5}" \ .format(count, image_id, instance_type, keyname, group, zone)) # First, see how many instances are running and what their info is. start_time = datetime.datetime.now() active_public_ips, active_private_ips, active_instances = \ self.describe_instances(parameters) # Construct URLs image_url = '{0}{1}/global/images/{2}'.format(self.GCE_URL, project_id, image_id) project_url = '{0}{1}'.format(self.GCE_URL, project_id) machine_type_url = '{0}/zones/{1}/machineTypes/{2}'.format( project_url, zone, instance_type) network_url = '{0}/global/networks/{1}'.format(project_url, group) # Construct the request body for index in range(count): instances = { # Truncate the name down to the first 62 characters, since GCE doesn't # let us use arbitrarily long instance names. 'name': "appscale-{0}-{1}".format(group, uuid.uuid4())[:62], 'machineType': machine_type_url, 'image': image_url, 'networkInterfaces': [{ 'accessConfigs': [{ 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT' }], 'network': network_url }], 'serviceAccounts': [{ 'email': self.DEFAULT_SERVICE_EMAIL, 'scopes': [self.GCE_SCOPE] }] } # Create the instance gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().insert(project=project_id, body=instances, zone=zone) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT]) instance_ids = [] public_ips = [] private_ips = [] end_time = datetime.datetime.now() + datetime.timedelta( 0, self.MAX_VM_CREATION_TIME) now = datetime.datetime.now() while now < end_time: AppScaleLogger.log("Waiting for your instances to start...") instance_info = self.describe_instances(parameters) public_ips = instance_info[0] private_ips = instance_info[1] instance_ids = instance_info[2] public_ips = self.diff(public_ips, active_public_ips) private_ips = self.diff(private_ips, active_private_ips) instance_ids = self.diff(instance_ids, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) now = datetime.datetime.now() if not public_ips: self.handle_failure('No public IPs were able to be procured ' 'within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instance_ids[index] AppScaleLogger.log('Instance {0} failed to get a public IP address'\ 'and is being terminated'.format(instance_to_term)) self.terminate_instances([instance_to_term]) end_time = datetime.datetime.now() total_time = end_time - start_time AppScaleLogger.log("Started {0} on-demand instances in {1} seconds" \ .format(count, total_time.seconds)) return instance_ids, public_ips, private_ips
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) if options.infrastructure: instance_id, public_ip, private_ip = cls.spawn_node_in_cloud(options) else: instance_id = cls.DUMMY_INSTANCE_ID public_ip = node_layout.head_node().public_ip private_ip = node_layout.head_node().private_ip AppScaleLogger.log("Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), public_ip)) try: cls.ensure_machine_is_compatible(public_ip, options.keyname, options.table, options.verbose) except AppScaleException as ase: # On failure shutdown the cloud instances, cleanup the keys, but only # if --test is not set. if options.infrastructure: if not options.test: try: cls.terminate_cloud_instance(instance_id, options) except Exception as tcie: AppScaleLogger.log("Error terminating instances: {0}" .format(str(tcie))) raise AppScaleException("{0} Please ensure that the "\ "image {1} has AppScale {2} installed on it." .format(str(ase),options.machine,APPSCALE_VERSION)) else: raise AppScaleException("{0} Please login to that machine and ensure "\ "that AppScale {1} is installed on it." .format(str(ase),APPSCALE_VERSION)) if options.scp: AppScaleLogger.log("Copying over local copy of AppScale from {0}".format( options.scp)) cls.rsync_files(public_ip, options.keyname, options.scp, options.verbose) if options.infrastructure: agent = InfrastructureAgentFactory.create_agent(options.infrastructure) params = agent.get_params_from_args(options) additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = str(params[agent.PARAM_SPOT_PRICE]) else: additional_params = {} deployment_params = LocalState.generate_deployment_params(options, node_layout, public_ip, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) AppScaleLogger.log("Head node successfully initialized at {0}. It is now "\ "starting up {1}.".format(public_ip, options.table)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(public_ip, options) cls.start_remote_appcontroller(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, secret_key) locations = ["{0}:{1}:{2}:{3}:cloud1".format(public_ip, private_ip, ":".join(node_layout.head_node().roles), instance_id)] acc.set_parameters(locations, LocalState.map_to_array(deployment_params)) return public_ip, instance_id
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) if options.infrastructure: instance_id, public_ip, private_ip = cls.spawn_node_in_cloud(options) else: instance_id = cls.DUMMY_INSTANCE_ID public_ip = node_layout.head_node().public_ip private_ip = node_layout.head_node().private_ip AppScaleLogger.log("Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), public_ip)) try: cls.ensure_machine_is_compatible(public_ip, options.keyname, options.table, options.verbose) except AppScaleException as ase: # On failure shutdown the cloud instances, cleanup the keys, but only # if --test is not set. if options.infrastructure: if not options.test: try: cls.terminate_cloud_instance(instance_id, options) except Exception as tcie: AppScaleLogger.log("Error terminating instances: {0}" .format(str(tcie))) raise AppScaleException("{0} Please ensure that the "\ "image {1} has AppScale {2} installed on it." .format(str(ase), options.machine, APPSCALE_VERSION)) else: raise AppScaleException("{0} Please login to that machine and ensure "\ "that AppScale {1} is installed on it." .format(str(ase), APPSCALE_VERSION)) if options.scp: AppScaleLogger.log("Copying over local copy of AppScale from {0}".format( options.scp)) cls.rsync_files(public_ip, options.keyname, options.scp, options.verbose) # On Euca, we've seen issues where attaching the EBS volume right after # the instance starts doesn't work. This sleep lets the instance fully # start up and get volumes attached to it correctly. if options.infrastructure and options.infrastructure == 'euca' and \ options.disks: time.sleep(30) if options.infrastructure: agent = InfrastructureAgentFactory.create_agent(options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[agent.PARAM_REGION] else: additional_params = {} deployment_params = LocalState.generate_deployment_params(options, node_layout, public_ip, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) AppScaleLogger.log("Head node successfully initialized at {0}.".format(public_ip)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(public_ip, options) cls.run_user_commands(public_ip, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, secret_key) locations = [{ 'public_ip' : public_ip, 'private_ip' : private_ip, 'jobs' : node_layout.head_node().roles, 'instance_id' : instance_id, 'disk' : node_layout.head_node().disk }] try: acc.set_parameters(locations, LocalState.map_to_array(deployment_params)) except Exception as exception: AppScaleLogger.warn('Saw Exception while setting AC parameters: {0}' \ .format(str(exception))) message = RemoteHelper.collect_appcontroller_crashlog(public_ip, options.keyname, options.verbose) raise AppControllerException(message) return public_ip, instance_id
def run_instances(self, count, parameters, security_configured): """ Starts 'count' instances in Google Compute Engine, and returns once they have been started. Callers should create a network and attach a firewall to it before using this method, or the newly created instances will not have a network and firewall to attach to (and thus this method will fail). Args: count: An int that specifies how many virtual machines should be started. parameters: A dict with keys for each parameter needed to connect to Google Compute Engine. security_configured: Unused, as we assume that the network and firewall has already been set up. """ project_id = parameters[self.PARAM_PROJECT] image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] zone = parameters[self.PARAM_ZONE] AppScaleLogger.log("Starting {0} machines with machine id {1}, with " \ "instance type {2}, keyname {3}, in security group {4}, in zone {5}" \ .format(count, image_id, instance_type, keyname, group, zone)) # First, see how many instances are running and what their info is. start_time = datetime.datetime.now() active_public_ips, active_private_ips, active_instances = \ self.describe_instances(parameters) # Construct URLs image_url = '{0}{1}/global/images/{2}'.format(self.GCE_URL, project_id, image_id) project_url = '{0}{1}'.format(self.GCE_URL, project_id) machine_type_url = '{0}/zones/{1}/machineTypes/{2}'.format(project_url, zone, instance_type) network_url = '{0}/global/networks/{1}'.format(project_url, group) # Construct the request body for index in range(count): disk_url = self.create_scratch_disk(parameters) instances = { # Truncate the name down to the first 62 characters, since GCE doesn't # let us use arbitrarily long instance names. 'name': "appscale-{0}-{1}".format(group, uuid.uuid4())[:62], 'machineType': machine_type_url, 'disks':[{ 'source': disk_url, 'boot': 'true', 'type': 'PERSISTENT' }], 'image': image_url, 'networkInterfaces': [{ 'accessConfigs': [{ 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT' }], 'network': network_url }], 'serviceAccounts': [{ 'email': self.DEFAULT_SERVICE_EMAIL, 'scopes': [self.GCE_SCOPE] }] } # Create the instance gce_service, credentials = self.open_connection(parameters) http = httplib2.Http() auth_http = credentials.authorize(http) request = gce_service.instances().insert( project=project_id, body=instances, zone=zone) response = request.execute(http=auth_http) AppScaleLogger.verbose(str(response), parameters[self.PARAM_VERBOSE]) self.ensure_operation_succeeds(gce_service, auth_http, response, parameters[self.PARAM_PROJECT]) instance_ids = [] public_ips = [] private_ips = [] end_time = datetime.datetime.now() + datetime.timedelta(0, self.MAX_VM_CREATION_TIME) now = datetime.datetime.now() while now < end_time: AppScaleLogger.log("Waiting for your instances to start...") instance_info = self.describe_instances(parameters) public_ips = instance_info[0] private_ips = instance_info[1] instance_ids = instance_info[2] public_ips = self.diff(public_ips, active_public_ips) private_ips = self.diff(private_ips, active_private_ips) instance_ids = self.diff(instance_ids, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) now = datetime.datetime.now() if not public_ips: self.handle_failure('No public IPs were able to be procured ' 'within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instance_ids[index] AppScaleLogger.log('Instance {0} failed to get a public IP address'\ 'and is being terminated'.format(instance_to_term)) self.terminate_instances([instance_to_term]) end_time = datetime.datetime.now() total_time = end_time - start_time AppScaleLogger.log("Started {0} on-demand instances in {1} seconds" \ .format(count, total_time.seconds)) return instance_ids, public_ips, private_ips
def gather_logs(cls, options): """Collects logs from each machine in the currently running AppScale deployment. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ # First, make sure that the place we want to store logs doesn't # already exist. if os.path.exists(options.location): raise AppScaleException("Can't gather logs, as the location you " + \ "specified, {0}, already exists.".format(options.location)) acc = AppControllerClient(LocalState.get_login_host(options.keyname), LocalState.get_secret_key(options.keyname)) try: all_ips = acc.get_all_public_ips() except socket.error: # Occurs when the AppController has failed. AppScaleLogger.warn("Couldn't get an up-to-date listing of the " + \ "machines in this AppScale deployment. Using our locally cached " + \ "info instead.") all_ips = LocalState.get_all_public_ips(options.keyname) # do the mkdir after we get the secret key, so that a bad keyname will # cause the tool to crash and not create this directory os.mkdir(options.location) # The log paths that we collect logs from. log_paths = [ '/var/log/appscale', '/var/log/kern.log*', '/var/log/monit.log*', '/var/log/nginx', '/var/log/syslog*', '/var/log/zookeeper' ] failures = False for ip in all_ips: # Get the logs from each node, and store them in our local directory local_dir = "{0}/{1}".format(options.location, ip) os.mkdir(local_dir) for log_path in log_paths: try: RemoteHelper.scp_remote_to_local(ip, options.keyname, log_path, local_dir, options.verbose) except ShellException as shell_exception: failures = True AppScaleLogger.warn( "Unable to collect logs from '{}' for host '{}'". format(log_path, ip)) AppScaleLogger.verbose( "Encountered exception: {}".format( str(shell_exception)), options.verbose) if failures: AppScaleLogger.log( "Done copying to {0}. There were " "failures while collecting AppScale logs.".format( options.location)) else: AppScaleLogger.success( "Successfully collected all AppScale logs into " "{0}".format(options.location))
def upload_app(cls, options): """Uploads the given App Engine application into AppScale. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Returns: A tuple containing the host and port where the application is serving traffic from. """ if cls.TAR_GZ_REGEX.search(options.file): file_location = LocalState.extract_tgz_app_to_dir( options.file, options.verbose) created_dir = True elif cls.ZIP_REGEX.search(options.file): file_location = LocalState.extract_zip_app_to_dir( options.file, options.verbose) created_dir = True elif os.path.isdir(options.file): file_location = options.file created_dir = False else: raise AppEngineConfigException('{0} is not a tar.gz file, a zip file, ' \ 'or a directory. Please try uploading either a tar.gz file, a zip ' \ 'file, or a directory.'.format(options.file)) try: app_id = AppEngineHelper.get_app_id_from_app_config(file_location) except AppEngineConfigException as config_error: AppScaleLogger.log(config_error) if 'yaml' in str(config_error): raise config_error # Java App Engine users may have specified their war directory. In that # case, just move up one level, back to the app's directory. file_location = file_location + os.sep + ".." app_id = AppEngineHelper.get_app_id_from_app_config(file_location) app_language = AppEngineHelper.get_app_runtime_from_app_config( file_location) AppEngineHelper.validate_app_id(app_id) if app_language == 'java': if AppEngineHelper.is_sdk_mismatch(file_location): AppScaleLogger.warn( 'AppScale did not find the correct SDK jar ' + 'versions in your app. The current supported ' + 'SDK version is ' + AppEngineHelper.SUPPORTED_SDK_VERSION + '.') login_host = LocalState.get_login_host(options.keyname) secret_key = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(login_host, secret_key) if options.test: username = LocalState.DEFAULT_USER elif options.email: username = options.email else: username = LocalState.get_username_from_stdin(is_admin=False) if not acc.does_user_exist(username): password = LocalState.get_password_from_stdin() RemoteHelper.create_user_accounts(username, password, login_host, options.keyname, clear_datastore=False) app_exists = acc.does_app_exist(app_id) app_admin = acc.get_app_admin(app_id) if app_admin is not None and username != app_admin: raise AppScaleException("The given user doesn't own this application" + \ ", so they can't upload an app with that application ID. Please " + \ "change the application ID and try again.") if app_exists: AppScaleLogger.log( "Uploading new version of app {0}".format(app_id)) else: AppScaleLogger.log( "Uploading initial version of app {0}".format(app_id)) acc.reserve_app_id(username, app_id, app_language) # Ignore all .pyc files while tarring. if app_language == 'python27': AppScaleLogger.log("Ignoring .pyc files") remote_file_path = RemoteHelper.copy_app_to_host( file_location, options.keyname, options.verbose) acc.done_uploading(app_id, remote_file_path) acc.update([app_id]) # now that we've told the AppController to start our app, find out what port # the app is running on and wait for it to start serving AppScaleLogger.log("Please wait for your app to start serving.") if app_exists: time.sleep(20) # give the AppController time to restart the app # Makes a call to the AppController to get all the stats and looks # through them for the http port the app can be reached on. sleep_time = 2 * cls.SLEEP_TIME current_app = None for i in range(cls.MAX_RETRIES): try: result = acc.get_all_stats() json_result = json.loads(result) apps_result = json_result['apps'] current_app = apps_result[app_id] http_port = current_app['http'] break except ValueError: pass except KeyError: pass AppScaleLogger.verbose("Waiting {0} second(s) for a port to be assigned to {1}".\ format(sleep_time, app_id), options.verbose) time.sleep(sleep_time) if not current_app: raise AppScaleException( "Unable to get the serving port for the application.") RemoteHelper.sleep_until_port_is_open(login_host, http_port, options.verbose) AppScaleLogger.success( "Your app can be reached at the following URL: " + "http://{0}:{1}".format(login_host, http_port)) if created_dir: shutil.rmtree(file_location) return (login_host, http_port)
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) if options.infrastructure: instance_id, public_ip, private_ip = cls.spawn_node_in_cloud( options) else: instance_id = cls.DUMMY_INSTANCE_ID public_ip = node_layout.head_node().public_ip private_ip = node_layout.head_node().private_ip AppScaleLogger.log( "Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), public_ip)) try: cls.ensure_machine_is_compatible(public_ip, options.keyname, options.table, options.verbose) except AppScaleException as ase: # On failure shutdown the cloud instances, cleanup the keys, but only # if --test is not set. if options.infrastructure: if not options.test: try: cls.terminate_cloud_instance(instance_id, options) except Exception as tcie: AppScaleLogger.log( "Error terminating instances: {0}".format( str(tcie))) raise AppScaleException("{0} Please ensure that the "\ "image {1} has AppScale {2} installed on it." .format(str(ase), options.machine, APPSCALE_VERSION)) else: raise AppScaleException("{0} Please login to that machine and ensure "\ "that AppScale {1} is installed on it." .format(str(ase), APPSCALE_VERSION)) if options.scp: AppScaleLogger.log( "Copying over local copy of AppScale from {0}".format( options.scp)) cls.rsync_files(public_ip, options.keyname, options.scp, options.verbose) # On Euca, we've seen issues where attaching the EBS volume right after # the instance starts doesn't work. This sleep lets the instance fully # start up and get volumes attached to it correctly. if options.infrastructure and options.infrastructure == 'euca' and \ options.disks: time.sleep(30) if options.infrastructure: agent = InfrastructureAgentFactory.create_agent( options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[ agent.PARAM_REGION] else: additional_params = {} deployment_params = LocalState.generate_deployment_params( options, node_layout, public_ip, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) AppScaleLogger.log( "Head node successfully initialized at {0}.".format(public_ip)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(public_ip, options) cls.run_user_commands(public_ip, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(public_ip, options.keyname, options.verbose) acc = AppControllerClient(public_ip, secret_key) locations = [{ 'public_ip': public_ip, 'private_ip': private_ip, 'jobs': node_layout.head_node().roles, 'instance_id': instance_id, 'disk': node_layout.head_node().disk }] try: acc.set_parameters(locations, LocalState.map_to_array(deployment_params)) except Exception as exception: AppScaleLogger.warn('Saw Exception while setting AC parameters: {0}' \ .format(str(exception))) message = RemoteHelper.collect_appcontroller_crashlog( public_ip, options.keyname, options.verbose) raise AppControllerException(message) return public_ip, instance_id
def shell(cls, command, is_verbose, num_retries=DEFAULT_NUM_RETRIES, stdin=None): """Executes a command on this machine, retrying it up to five times if it initially fails. Args: command: A str representing the command to execute. is_verbose: A bool that indicates if we should print the command we are executing to stdout. num_retries: The number of times we should try to execute the given command before aborting. stdin: A str that is passes as standard input to the process Returns: A str with both the standard output and standard error produced when the command executes. Raises: ShellException: If, after five attempts, executing the named command failed. """ tries_left = num_retries try: while tries_left: AppScaleLogger.verbose("shell> {0}".format(command), is_verbose) the_temp_file = tempfile.NamedTemporaryFile() if stdin is not None: stdin_strio = tempfile.TemporaryFile() stdin_strio.write(stdin) stdin_strio.seek(0) AppScaleLogger.verbose(" stdin str: {0}"\ .format(stdin), is_verbose) result = subprocess.Popen(command, shell=True, stdout=the_temp_file, stdin=stdin_strio, stderr=subprocess.STDOUT) else: result = subprocess.Popen(command, shell=True, stdout=the_temp_file, stderr=subprocess.STDOUT) AppScaleLogger.verbose(" stdout buffer: {0}"\ .format(the_temp_file.name), is_verbose) result.wait() if stdin is not None: stdin_strio.close() if result.returncode == 0: the_temp_file.seek(0) output = the_temp_file.read() the_temp_file.close() return output tries_left -= 1 if tries_left: the_temp_file.close() AppScaleLogger.verbose("Command failed. Trying again momentarily." \ .format(command), is_verbose) else: the_temp_file.seek(0) output = the_temp_file.read() the_temp_file.close() if stdin: raise ShellException("Executing command '{0} {1}' failed:\n{2}"\ .format(command, stdin, output)) else: raise ShellException("Executing command '{0}' failed:\n{1}"\ .format(command, output)) time.sleep(1) except OSError as os_error: if stdin: raise ShellException("Error executing command: '{0} {1}':{2}"\ .format(command, stdin, os_error)) else: raise ShellException("Error executing command: '{0}':{1}"\ .format(command, os_error))
def start_head_node(cls, options, my_id, node_layout): """Starts the first node in an AppScale deployment and instructs it to start API services on its own node, as well as the other nodes in the deployment. This includes spawning the first node in the deployment, copying over all deployment-specific files to it, and starting its AppController service. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). my_id: A str that is used to uniquely identify this AppScale deployment with the remote start application. node_layout: A NodeLayout that describes the placement strategy that should be used for this AppScale deployment. Returns: The public IP and instance ID (a dummy value in non-cloud deployments) corresponding to the node that was started. Raises: AppControllerException: If the AppController on the head node crashes. The message in this exception indicates why the crash occurred. """ secret_key = LocalState.generate_secret_key(options.keyname) AppScaleLogger.verbose("Secret key is {0}".format(secret_key), options.verbose) head_node = node_layout.head_node().public_ip AppScaleLogger.log( "Log in to your head node: ssh -i {0} root@{1}".format( LocalState.get_key_path_from_name(options.keyname), head_node)) additional_params = {} if options.infrastructure: agent = InfrastructureAgentFactory.create_agent( options.infrastructure) params = agent.get_params_from_args(options) additional_params = {} if agent.PARAM_CREDENTIALS in params: additional_params = params[agent.PARAM_CREDENTIALS] if options.use_spot_instances: additional_params[agent.PARAM_SPOT_PRICE] = \ str(params[agent.PARAM_SPOT_PRICE]) if agent.PARAM_REGION in params: additional_params[agent.PARAM_REGION] = params[ agent.PARAM_REGION] time.sleep(10) # gives machines in cloud extra time to boot up cls.copy_deployment_credentials(head_node, options) cls.run_user_commands(head_node, options.user_commands, options.keyname, options.verbose) cls.start_remote_appcontroller(head_node, options.keyname, options.verbose) AppScaleLogger.log( "Head node successfully initialized at {0}.".format(head_node)) AppScaleLogger.remote_log_tools_state(options, my_id, "started head node", APPSCALE_VERSION) # Construct serverside compatible parameters. deployment_params = LocalState.generate_deployment_params( options, node_layout, additional_params) AppScaleLogger.verbose(str(LocalState.obscure_dict(deployment_params)), options.verbose) acc = AppControllerClient(head_node, secret_key) try: acc.set_parameters(node_layout.to_list(), deployment_params) except Exception as exception: AppScaleLogger.warn( 'Saw Exception while setting AC parameters: {0}'.format( str(exception))) message = RemoteHelper.collect_appcontroller_crashlog( head_node, options.keyname, options.verbose) raise AppControllerException(message)
def run_instances(cls, options): """Starts a new AppScale deployment with the parameters given. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Raises: AppControllerException: If the AppController on the head node crashes. When this occurs, the message in the exception contains the reason why the AppController crashed. BadConfigurationException: If the user passes in options that are not sufficient to start an AppScale deployment (e.g., running on EC2 but not specifying the AMI to use), or if the user provides us contradictory options (e.g., running on EC2 but not specifying EC2 credentials). """ LocalState.make_appscale_directory() LocalState.ensure_appscale_isnt_running(options.keyname, options.force) if options.infrastructure: if not options.disks and not options.test and not options.force: LocalState.ensure_user_wants_to_run_without_disks() reduced_version = '.'.join(x for x in APPSCALE_VERSION.split('.')[:2]) AppScaleLogger.log("Starting AppScale " + reduced_version) my_id = str(uuid.uuid4()) AppScaleLogger.remote_log_tools_state(options, my_id, "started", APPSCALE_VERSION) node_layout = NodeLayout(options) if not node_layout.is_valid(): raise BadConfigurationException("There were errors with your " + \ "placement strategy:\n{0}".format(str(node_layout.errors()))) head_node = node_layout.head_node() # Start VMs in cloud via cloud agent. if options.infrastructure: instance_ids, public_ips, private_ips = RemoteHelper.start_all_nodes( options, len(node_layout.nodes)) AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines " "for use. This can take few minutes.") # Set newly obtained node layout info for this deployment. for i, _ in enumerate(instance_ids): node_layout.nodes[i].public_ip = public_ips[i] node_layout.nodes[i].private_ip = private_ips[i] node_layout.nodes[i].instance_id = instance_ids[i] # Enables root logins and SSH access on the head node. RemoteHelper.enable_root_ssh(options, head_node.public_ip) AppScaleLogger.verbose("Node Layout: {}".format(node_layout.to_list()), options.verbose) # Ensure all nodes are compatible. RemoteHelper.ensure_machine_is_compatible( head_node.public_ip, options.keyname, options.verbose) # Use rsync to move custom code into the deployment. if options.scp: AppScaleLogger.log("Copying over local copy of AppScale from {0}". format(options.scp)) RemoteHelper.rsync_files(head_node.public_ip, options.keyname, options.scp, options.verbose) # Start services on head node. RemoteHelper.start_head_node(options, my_id, node_layout) # Write deployment metadata to disk (facilitates SSH operations, etc.) db_master = node_layout.db_master().private_ip head_node = node_layout.head_node().public_ip LocalState.update_local_metadata(options, db_master, head_node) # Copy the locations.json to the head node RemoteHelper.copy_local_metadata(node_layout.head_node().public_ip, options.keyname, options.verbose) # Wait for services on head node to start. secret_key = LocalState.get_secret_key(options.keyname) acc = AppControllerClient(head_node, secret_key) try: while not acc.is_initialized(): AppScaleLogger.log('Waiting for head node to initialize...') # This can take some time in particular the first time around, since # we will have to initialize the database. time.sleep(cls.SLEEP_TIME*3) except socket.error as socket_error: AppScaleLogger.warn('Unable to initialize AppController: {}'. format(socket_error.message)) message = RemoteHelper.collect_appcontroller_crashlog( head_node, options.keyname, options.verbose) raise AppControllerException(message) # Set up admin account. try: # We don't need to have any exception information here: we do expect # some anyway while the UserAppServer is coming up. acc.does_user_exist("non-existent-user", True) except Exception: AppScaleLogger.log('UserAppServer not ready yet. Retrying ...') time.sleep(cls.SLEEP_TIME) if options.admin_user and options.admin_pass: AppScaleLogger.log("Using the provided admin username/password") username, password = options.admin_user, options.admin_pass elif options.test: AppScaleLogger.log("Using default admin username/password") username, password = LocalState.DEFAULT_USER, LocalState.DEFAULT_PASSWORD else: username, password = LocalState.get_credentials() RemoteHelper.create_user_accounts(username, password, head_node, options.keyname) acc.set_admin_role(username, 'true', cls.ADMIN_CAPABILITIES) # Wait for machines to finish loading and AppScale Dashboard to be deployed. RemoteHelper.wait_for_machines_to_finish_loading(head_node, options.keyname) RemoteHelper.sleep_until_port_is_open(LocalState.get_login_host( options.keyname), RemoteHelper.APP_DASHBOARD_PORT, options.verbose) AppScaleLogger.success("AppScale successfully started!") AppScaleLogger.success("View status information about your AppScale " + \ "deployment at http://{0}:{1}".format(LocalState.get_login_host( options.keyname), RemoteHelper.APP_DASHBOARD_PORT)) AppScaleLogger.remote_log_tools_state(options, my_id, "finished", APPSCALE_VERSION)