def set_instances(self): """Start/stop instances so we have the requested number running.""" gce_project = self._create_gce() self._setup_firewall(gce_project) image = self._get_image(gce_project) disks = self._get_disks(gce_project) # Get the list of instances to insert. num_instances = int(self.request.get('num_instances')) target = self._get_instance_list(gce_project, num_instances, image, disks) target_set = set() target_map = {} for instance in target: target_set.add(instance.name) target_map[instance.name] = instance # Get the list of instances running current = gce_appengine.GceAppEngine().run_gce_request( self, gce_project.list_instances, 'Error listing instances: ', filter='name eq ^%s-.*' % self.instance_prefix()) current_set = set() current_map = {} for instance in current: current_set.add(instance.name) current_map[instance.name] = instance # Add the new instances to_add_set = target_set - current_set to_add = [target_map[name] for name in to_add_set] if to_add: gce_appengine.GceAppEngine().run_gce_request( self, gce_project.bulk_insert, 'Error inserting instances: ', resources=to_add) # Remove the old instances to_remove_set = current_set - target_set to_remove = [current_map[name] for name in to_remove_set] if to_remove: gce_appengine.GceAppEngine().run_gce_request( self, gce_project.bulk_delete, 'Error deleting instances: ', resources=to_remove) logging.info("current_set: %s", current_set) logging.info("target_set: %s", target_set) logging.info("to_add_set: %s", to_add_set) logging.info("to_remove_set: %s", to_remove_set)
def post(self): """Start instances using the gce_appengine helper class.""" gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_zone_name = data_handler.stored_user_data[user_data.GCE_ZONE_NAME] user_id = users.get_current_user().user_id() credentials = oauth2client.StorageByKeyName( oauth2client.CredentialsModel, user_id, 'credentials').get() gce_project = gce.GceProject(credentials, project_id=gce_project_id, zone_name=gce_zone_name) num_instances = int(self.request.get('num_instances')) instances = [ gce.Instance('%s-%d' % (DEMO_NAME, i)) for i in range(num_instances) ] response = gce_appengine.GceAppEngine().run_gce_request( self, gce_project.bulk_insert, 'Error inserting instances: ', resources=instances) # Record objective in datastore so we can recover work in progress. updateObjective(gce_project_id, num_instances) if response: self.response.headers['Content-Type'] = 'text/plain' self.response.out.write('starting cluster')
def get(self): """Get and return the list of instances with names containing the tag.""" gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_project = gce.GceProject(oauth_decorator.credentials, project_id=gce_project_id) gce_appengine.GceAppEngine().list_demo_instances( self, gce_project, DEMO_NAME)
def post(self): """Stop instances using the gce_appengine helper class.""" user_info = getUserDemoInfo(users.get_current_user()) gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_zone_name = data_handler.stored_user_data[user_data.GCE_ZONE_NAME] user_id = users.get_current_user().user_id() credentials = oauth2client.StorageByKeyName( oauth2client.CredentialsModel, user_id, 'credentials').get() gce_project = gce.GceProject(credentials, project_id=gce_project_id, zone_name=gce_zone_name) gce_appengine.GceAppEngine().delete_demo_instances( self, gce_project, user_info['demo_id']) # Record reset objective in datastore so we can recover work in progress. updateObjective(user_info['project_id'], 0) gce_appengine.GceAppEngine().delete_demo_route( self, gce_project, '%s-0' % user_info['demo_id'])
def post(self): """Stop instances with names containing the tag.""" user = users.get_current_user() credentials = oauth2client.StorageByKeyName( oauth2client.CredentialsModel, user.user_id(), 'credentials').get() gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_project = gce.GceProject(credentials, project_id=gce_project_id) gce_appengine.GceAppEngine().delete_demo_instances( self, gce_project, DEMO_NAME)
def get(self): """List instances using the gce_appengine helper class. Return the results as JSON mapping instance name to status. """ gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_zone_name = data_handler.stored_user_data[user_data.GCE_ZONE_NAME] gce_project = gce.GceProject(oauth_decorator.credentials, project_id=gce_project_id, zone_name=gce_zone_name) gce_appengine.GceAppEngine().list_demo_instances( self, gce_project, DEMO_NAME)
def post(self): """Insert instances with a startup script, metadata, and scopes. Startup script is randomly chosen to either rotate images left or right. Metadata includes the image to rotate, the demo name tag, and the machine number. Service account scopes include Compute and storage. """ user = users.get_current_user() credentials = oauth2client.StorageByKeyName( oauth2client.CredentialsModel, user.user_id(), 'credentials').get() gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_project = gce.GceProject(credentials, project_id=gce_project_id) # Get the bucket info for the instance metadata. gcs_bucket = data_handler.stored_user_data[user_data.GCS_BUCKET] gcs_directory = data_handler.stored_user_data.get( user_data.GCS_DIRECTORY, None) gcs_path = None if gcs_directory: gcs_path = '%s/%s' % (gcs_bucket, gcs_directory) else: gcs_path = gcs_bucket # Figure out the image. Use custom image if it exists. (image_project, image_name) = self._get_image_name(gce_project) # Create a list of instances to insert. instances = [] num_instances = int(self.request.get('num_instances')) for i in range(num_instances): startup_script = os.path.join(os.path.dirname(__file__), 'startup.sh') instances.append( gce.Instance(name='%s-%d' % (DEMO_NAME, i), image_project_id=image_project, image_name=image_name, service_accounts=gce_project. settings['cloud_service_account'], metadata=[{ 'key': 'startup-script', 'value': open(startup_script, 'r').read() }, { 'key': 'image', 'value': random.choice(IMAGES) }, { 'key': 'seq', 'value': random.choice(SEQUENCES) }, { 'key': 'machine-num', 'value': i }, { 'key': 'tag', 'value': DEMO_NAME }, { 'key': 'gcs-path', 'value': gcs_path }])) response = gce_appengine.GceAppEngine().run_gce_request( self, gce_project.bulk_insert, 'Error inserting instances: ', resources=instances) if response: self.response.headers['Content-Type'] = 'text/plain' self.response.out.write('starting cluster')
def cleanup(self): """Stop instances using the gce_appengine helper class.""" gce_project = self._create_gce() gce_appengine.GceAppEngine().delete_demo_instances( self, gce_project, self.instance_prefix())
def get_instances(self): """List instances. Uses app engine app identity to retrieve an access token for the app engine service account. No client OAuth required. External IP is used to determine if the instance is actually running. """ gce_project = self._create_gce() instances = gce_appengine.GceAppEngine().run_gce_request( self, gce_project.list_instances, 'Error listing instances: ', filter='name eq ^%s-.*' % self.instance_prefix()) # A map of instanceName -> (ip, RPC) health_rpcs = {} # Convert instance info to dict and check server status. num_running = 0 instance_dict = {} if instances: for instance in instances: instance_record = {} instance_dict[instance.name] = instance_record if instance.status: instance_record['status'] = instance.status else: instance_record['status'] = 'OTHER' ip = None for interface in instance.network_interfaces: for config in interface.get('accessConfigs', []): if 'natIP' in config: ip = config['natIP'] instance_record['externalIp'] = ip break if ip: break # Ping the instance server. Grab stats from /debug/vars. if ip and instance.status == 'RUNNING': num_running += 1 health_url = 'http://%s/debug/vars?t=%d' % ( ip, int(time.time())) logging.debug('Health checking %s', health_url) rpc = urlfetch.create_rpc(deadline=HEALTH_CHECK_TIMEOUT) urlfetch.make_fetch_call(rpc, url=health_url) health_rpcs[instance.name] = rpc # Ping through a LBs too. Only if we get success there do we know we are # really serving. loadbalancers = [] lb_rpcs = {} if instances and len(instances) > 1: loadbalancers = self._get_lb_servers() if num_running > 0 and loadbalancers: for lb in loadbalancers: health_url = 'http://%s/health?t=%d' % (lb, int(time.time())) logging.debug('Health checking %s', health_url) rpc = urlfetch.create_rpc(deadline=HEALTH_CHECK_TIMEOUT) urlfetch.make_fetch_call(rpc, url=health_url) lb_rpcs[lb] = rpc # wait for RPCs to complete and update dict as necessary vars_aggregator = ServerVarsAggregator() # TODO: there is significant duplication here. Refactor. for (instance_name, rpc) in health_rpcs.items(): result = None instance_record = instance_dict[instance_name] try: result = rpc.get_result() if result and "memstats" in result.content: logging.debug('%s healthy!', instance_name) instance_record['status'] = 'SERVING' instance_vars = {} try: instance_vars = json.loads(result.content) instance_record['vars'] = instance_vars vars_aggregator.aggregate_vars(instance_vars) except ValueError as error: logging.error('Error decoding vars json for %s: %s', instance_name, error) else: logging.debug('%s unhealthy. Content: %s', instance_name, result.content) except urlfetch.Error as error: logging.debug('%s unhealthy: %s', instance_name, str(error)) # Check health status through the load balancer. loadbalancer_healthy = bool(lb_rpcs) for (lb, lb_rpc) in lb_rpcs.items(): result = None try: result = lb_rpc.get_result() if result and "ok" in result.content: logging.info('LB %s healthy: %s\n%s', lb, result.headers, result.content) else: logging.info('LB %s result not okay: %s, %s', lb, result.status_code, result.content) loadbalancer_healthy = False break except urlfetch.Error as error: logging.info('LB %s fetch error: %s', lb, str(error)) loadbalancer_healthy = False break response_dict = { 'instances': instance_dict, 'vars': vars_aggregator.get_aggregate(), 'loadbalancers': loadbalancers, 'loadbalancer_healthy': loadbalancer_healthy, } self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps(response_dict))
def post(self): """Start instances using the gce_appengine helper class.""" user_info = getUserDemoInfo(users.get_current_user()) gce_project_id = data_handler.stored_user_data[ user_data.GCE_PROJECT_ID] gce_zone_name = data_handler.stored_user_data[user_data.GCE_ZONE_NAME] user_id = users.get_current_user().user_id() credentials = oauth2client.StorageByKeyName( oauth2client.CredentialsModel, user_id, 'credentials').get() gce_project = gce.GceProject(credentials, project_id=gce_project_id, zone_name=gce_zone_name) # Create a user specific route. We will apply this route to all # instances without an IP address so their requests are routed # through the first instance acting as a proxy. # gce_project.list_routes() proxy_instance = gce.Instance(name='%s-0' % user_info['demo_id'], zone_name=gce_zone_name) proxy_instance.gce_project = gce_project route_name = '%s-0' % user_info['demo_id'] gce_route = gce.Route(name=route_name, network_name='default', destination_range='0.0.0.0/0', next_hop_instance=proxy_instance, priority=200, tags=['qs-%s' % user_info['ldap']]) response = gce_appengine.GceAppEngine().run_gce_request( self, gce_project.insert, 'Error inserting route: ', resource=gce_route) # Define a network interfaces list here that requests an ephemeral # external IP address. We will apply this configuration to the first # VM started by quick start. All other VMs will take the default # network configuration, which requests no external IP address. network = gce.Network('default') network.gce_project = gce_project ext_net = [{ 'network': network.url, 'accessConfigs': [{ 'name': 'External IP access config', 'type': 'ONE_TO_ONE_NAT' }] }] num_instances = int(self.request.get('num_instances')) instances = [ gce.Instance( '%s-%d' % (user_info['demo_id'], i), zone_name=gce_zone_name, network_interfaces=(ext_net if i == 0 else None), metadata=([{ 'key': 'startup-script', 'value': user_data.STARTUP_SCRIPT % 'false' }] if i == 0 else [{ 'key': 'startup-script', 'value': user_data.STARTUP_SCRIPT % 'true' }]), service_accounts=[{ 'email': 'default', 'scopes': ['https://www.googleapis.com/auth/compute'] }], disk_mounts=[ gce.DiskMount(init_disk_name='%s-%d' % (user_info['demo_id'], i), boot=True) ], can_ip_forward=(True if i == 0 else False), tags=(['qs-proxy'] if i == 0 else ['qs-%s' % user_info['ldap']])) for i in range(num_instances) ] response = gce_appengine.GceAppEngine().run_gce_request( self, gce_project.bulk_insert, 'Error inserting instances: ', resources=instances) # Record objective in datastore so we can recover work in progress. updateObjective(user_info['project_id'], num_instances) if response: self.response.headers['Content-Type'] = 'text/plain' self.response.out.write('starting cluster')