def update_port_postcommit(self, context): vnic_type = context.current.get(portbindings.VNIC_TYPE) if vnic_type and vnic_type in self.unsupported_vnic_types: LOG.debug("Ignoring unsupported vnic_type %s" % vnic_type) return # update port on the network controller port = self._prepare_port_for_controller(context) if port: # For vhostuser type ports, membership rule and endpoint was # created during bind_port, so skip this if port[portbindings.VIF_TYPE] == portbindings.VIF_TYPE_VHOST_USER: return try: self.async_port_create(port["network"]["tenant_id"], port["network"]["id"], port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error( _LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto( triggered_by_tenant=port["network"]["tenant_id"])
def _report_state(self): # How many devices are likely used by a VM try: self.state_rpc.report_state(self.context, self.agent_state, self.use_call) self.use_call = False self.agent_state.pop('start_flag', None) except Exception: LOG.exception(_LE("Failed reporting state!"))
def async_port_create(self, tenant_id, net_id, port): try: tenant_id = tenant_id or servermanager.SERVICE_TENANT rest_port = copy.deepcopy(port) self._add_service_tenant_to_port(rest_port) self.servers.rest_create_port(tenant_id, net_id, rest_port) except servermanager.RemoteRestError as e: # 404 should never be received on a port create unless # there are inconsistencies between the data in neutron # and the data in the backend. # Run a sync to get it consistent. if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): LOG.error( _LE("Iconsistency with backend controller " "triggering full synchronization.")) # args depend on if we are operating in ML2 driver # or as the full plugin self._send_all_data_auto(triggered_by_tenant=tenant_id) # If the full sync worked, the port will be created # on the controller so it can be safely marked as active else: # Any errors that don't result in a successful auto-sync # require that the port be placed into the error state. LOG.error(_LE("NeutronRestProxyV2: Unable to create port: %s"), e) try: self._set_port_status(port['id'], const.PORT_STATUS_ERROR) except exceptions.PortNotFound: # If port is already gone from DB and there was an error # creating on the backend, everything is already consistent pass return new_status = (const.PORT_STATUS_ACTIVE if port['state'] == 'UP' else const.PORT_STATUS_DOWN) try: self._set_port_status(port['id'], new_status) except exceptions.PortNotFound: # This port was deleted before the create made it to the controller # so it now needs to be deleted since the normal delete request # would have deleted an non-existent port. tenant_id = tenant_id or servermanager.SERVICE_TENANT self.servers.rest_delete_port(tenant_id, net_id, port['id'])
def async_port_create(self, tenant_id, net_id, port): try: tenant_id = tenant_id or servermanager.SERVICE_TENANT rest_port = copy.deepcopy(port) self._add_service_tenant_to_port(rest_port) self.servers.rest_create_port(tenant_id, net_id, rest_port) except servermanager.RemoteRestError as e: # 404 should never be received on a port create unless # there are inconsistencies between the data in neutron # and the data in the backend. # Run a sync to get it consistent. if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): LOG.error(_LE("Iconsistency with backend controller " "triggering full synchronization.")) # args depend on if we are operating in ML2 driver # or as the full plugin self._send_all_data_auto(triggered_by_tenant=tenant_id) # If the full sync worked, the port will be created # on the controller so it can be safely marked as active else: # Any errors that don't result in a successful auto-sync # require that the port be placed into the error state. LOG.error( _LE("NeutronRestProxyV2: Unable to create port: %s"), e) try: self._set_port_status(port['id'], const.PORT_STATUS_ERROR) except exceptions.PortNotFound: # If port is already gone from DB and there was an error # creating on the backend, everything is already consistent pass return new_status = (const.PORT_STATUS_ACTIVE if port['state'] == 'UP' else const.PORT_STATUS_DOWN) try: self._set_port_status(port['id'], new_status) except exceptions.PortNotFound: # This port was deleted before the create made it to the controller # so it now needs to be deleted since the normal delete request # would have deleted an non-existent port. tenant_id = tenant_id or servermanager.SERVICE_TENANT self.servers.rest_delete_port(tenant_id, net_id, port['id'])
def get_capabilities(self): try: body = self.rest_call('GET', CAPABILITIES_PATH)[2] if body: self.capabilities = jsonutils.loads(body) except Exception: LOG.exception(_LE("Couldn't retrieve capabilities. " "Newer API calls won't be supported.")) LOG.info(_LI("The following capabilities were received " "for %(server)s: %(cap)s"), {'server': self.server, 'cap': self.capabilities}) return self.capabilities
def _send_floatingip_update(self, context): try: ext_net_id = self.get_external_network_id(context) if ext_net_id: # Use the elevated state of the context for the ext_net query admin_context = context.elevated() ext_net = super(L3RestProxy, self).get_network(admin_context, ext_net_id) # update external network on network controller self._send_update_network(ext_net, admin_context) except exceptions.TooManyExternalNetworks: # get_external_network can raise errors when multiple external # networks are detected, which isn't supported by the Plugin LOG.error(_LE("NeutronRestProxyV2: too many external networks"))
def set_port_mtu(self, port_name): # If this IVS port is attached to a VM, set the MTU of all # corresponding interfaces (veth pairs, tap and bridge interfaces) if IVS_VM_PORT_PREFIX in port_name: for iface in IVS_VM_PORT_IFACE_PREFIXES: iface_name = port_name.replace(IVS_VM_PORT_PREFIX, iface) cmd = ['ip', 'link', 'set', iface_name, 'mtu', IVS_PORT_MTU] try: utils.execute(cmd, run_as_root=True, return_stderr=False, log_fail_as_error=False) LOG.debug("MTU of port %s set to %d", str(iface_name), IVS_PORT_MTU) except Exception as e: LOG.error(_LE("Set MTU for port %(p)s failed. Unable to " "execute %(cmd)s. Exception: %(exception)s"), {'p': iface_name, 'cmd': cmd, 'exception': e})
def get_capabilities(self): try: body = self.rest_call('GET', CAPABILITIES_PATH)[2] if body: self.capabilities = jsonutils.loads(body) except Exception: LOG.exception( _LE("Couldn't retrieve capabilities. " "Newer API calls won't be supported.")) LOG.info( _LI("The following capabilities were received " "for %(server)s: %(cap)s"), { 'server': self.server, 'cap': self.capabilities }) return self.capabilities
def run_vsctl(self, args, check_error=False, log_fail_as_error=True): full_args = ["ivs-ctl"] + args try: resp = utils.execute(full_args, run_as_root=True, return_stderr=True, log_fail_as_error=log_fail_as_error) return resp[0] or resp[1] except Exception as e: with excutils.save_and_reraise_exception() as ctxt: if log_fail_as_error: logfunc = LOG.error else: logfunc = LOG.debug logfunc(_LE("Unable to execute %(cmd)s. " "Exception: %(exception)s"), {'cmd': full_args, 'exception': e}) if not check_error: ctxt.reraise = False
def update_port_postcommit(self, context): # update port on the network controller port = self._prepare_port_for_controller(context) if port: try: self.async_port_create(port["network"]["tenant_id"], port["network"]["id"], port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error(_LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto( triggered_by_tenant=port["network"]["tenant_id"] )
def _update_tenant_cache(self, reconcile=True): try: auth = v3.Password(auth_url=self.auth_url, username=self.auth_user, password=self.auth_password, project_name=self.auth_tenant, user_domain_id=self.user_domain_id, project_domain_id=self.project_domain_id) sess = session.Session(auth=auth) keystone_client = ksclient.Client(session=sess) tenants = keystone_client.projects.list() new_cached_tenants = {tn.id: tn.name for tn in tenants} # Add SERVICE_TENANT to handle hidden network for VRRP new_cached_tenants[SERVICE_TENANT] = SERVICE_TENANT LOG.debug("New TENANTS: %s \nPrevious Tenants %s" % (new_cached_tenants, self.keystone_tenants)) diff = DictDiffer(new_cached_tenants, self.keystone_tenants) self.keystone_tenants = new_cached_tenants if reconcile: for tenant_id in diff.added(): LOG.debug("TENANT create: id %s name %s" % (tenant_id, self.keystone_tenants[tenant_id])) self._rest_create_tenant(tenant_id) for tenant_id in diff.removed(): LOG.debug("TENANT delete: id %s" % tenant_id) self.rest_delete_tenant(tenant_id) if diff.changed(): hash_handler = cdb.HashHandler() res = hash_handler._get_current_record() if res: lock_owner = hash_handler._get_lock_owner(res.hash) if lock_owner and "TOPO" in lock_owner: # topology sync is still going on return True LOG.debug("TENANT changed: force topo sync") hash_handler.put_hash('initial:hash,code') return True except Exception: LOG.exception(_LE("Encountered an error syncing with " "keystone.")) return False
def _consistency_watchdog(self, polling_interval=60): if 'consistency' not in self.get_capabilities(): LOG.warning(_LW("Backend server(s) do not support automated " "consitency checks.")) return if not polling_interval: LOG.warning(_LW("Consistency watchdog disabled by polling " "interval setting of %s."), polling_interval) return while True: # If consistency is supported, all we have to do is make any # rest call and the consistency header will be added. If it # doesn't match, the backend will return a synchronization error # that will be handled by the rest_action. eventlet.sleep(polling_interval) try: self.rest_action('GET', HEALTH_PATH) except Exception: LOG.exception(_LE("Encountered an error checking controller " "health."))
def create_floatingip(self, context, floatingip): with context.session.begin(subtransactions=True): # create floatingip in DB new_fl_ip = super(L3RestProxy, self).create_floatingip(context, floatingip) # create floatingip on the network controller try: if 'floatingip' in self.servers.get_capabilities(): self.servers.rest_create_floatingip( new_fl_ip['tenant_id'], new_fl_ip) else: LOG.error(BCF_CAPABILITY_L3_PLUGIN_MISS_MATCH) self._send_floatingip_update(context) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception(): LOG.error( _LE("NeutronRestProxyV2: Unable to create remote " "floating IP: %s"), e) # return created floating IP return new_fl_ip
def _update_tenant_cache(self, reconcile=True): try: keystone_client = ksclient.Client(auth_url=self.auth_url, username=self.auth_user, password=self.auth_password, tenant_name=self.auth_tenant) tenants = keystone_client.tenants.list() new_cached_tenants = {tn.id: tn.name for tn in tenants} # Add SERVICE_TENANT to handle hidden network for VRRP new_cached_tenants[SERVICE_TENANT] = SERVICE_TENANT LOG.debug("New TENANTS: %s \nPrevious Tenants %s" % (new_cached_tenants, self.keystone_tenants)) diff = DictDiffer(new_cached_tenants, self.keystone_tenants) self.keystone_tenants = new_cached_tenants if reconcile: for tenant_id in diff.added(): LOG.debug("TENANT create: id %s name %s" % (tenant_id, self.keystone_tenants[tenant_id])) self._rest_create_tenant(tenant_id) for tenant_id in diff.removed(): LOG.debug("TENANT delete: id %s" % tenant_id) self.rest_delete_tenant(tenant_id) if diff.changed(): hash_handler = cdb.HashHandler() res = hash_handler._get_current_record() if res: lock_owner = hash_handler._get_lock_owner(res.hash) if lock_owner and "TOPO" in lock_owner: # topology sync is still going on return True LOG.debug("TENANT changed: force topo sync") hash_handler.put_hash('initial:hash,code') return True except Exception: LOG.exception(_LE("Encountered an error syncing with " "keystone.")) return False
def daemon_loop(self): ports = set() while True: start = time.time() try: port_info = self._update_ports(ports) if port_info: LOG.debug("Agent loop has new device") self._update_port_mtus(port_info) self._process_devices_filter(port_info) ports = port_info['current'] except Exception: LOG.exception(_LE("Error in agent event loop")) elapsed = max(time.time() - start, 0) if (elapsed < self.polling_interval): time.sleep(self.polling_interval - elapsed) else: LOG.debug("Loop iteration exceeded interval " "(%(polling_interval)s vs. %(elapsed)s)!", {'polling_interval': self.polling_interval, 'elapsed': elapsed})
def _consistency_watchdog(self, polling_interval=60): if 'consistency' not in self.get_capabilities(): LOG.warning( _LW("Backend server(s) do not support automated " "consitency checks.")) return if not polling_interval: LOG.warning( _LW("Consistency watchdog disabled by polling " "interval setting of %s."), polling_interval) return while True: # If consistency is supported, all we have to do is make any # rest call and the consistency header will be added. If it # doesn't match, the backend will return a synchronization error # that will be handled by the rest_action. eventlet.sleep(polling_interval) try: self.rest_action('GET', HEALTH_PATH) except Exception: LOG.exception( _LE("Encountered an error checking controller " "health."))
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() # remove the auth token so it's not present in debug logs on the # backend controller cdict.pop('auth_token', None) headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict) hash_handler = cdb.HashHandler() good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: LOG.debug( "ServerProxy: %(action)s to servers: " "%(server)r, %(resource)s" % { 'action': action, 'server': (active_server.server, active_server.port), 'resource': resource }) for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error( _('Server requires synchronization, ' 'but no topology function was defined.')) LOG.info( _LI("ServerProxy: HashConflict detected with request " "%(action)s %(resource)s Starting Topology sync"), { 'action': action, 'resource': resource }) self._topo_sync_in_progress = True eventlet.spawn_n(self.keep_updating_lock) try: data = self.get_topo_function( **self.get_topo_function_args) if data: data = self._sanitize_data_for_topo_sync(data) ret_ts = active_server.rest_call('POST', TOPOLOGY_PATH, data, timeout=None) if self.server_failure(ret_ts, ignore_codes): LOG.error(_LE("ServerProxy: Topology sync failed")) raise RemoteRestError(reason=ret_ts[2], status=ret_ts[0]) finally: LOG.info(_LI("ServerProxy: Topology sync completed")) self._topo_sync_in_progress = False if data is None: return None # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False LOG.debug( "ServerProxy: %(action)s succeed for servers: " "%(server)r Response: %(response)s" % { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) return ret else: LOG.warning( _LW('ServerProxy: %(action)s failure for servers:' '%(server)r Response: %(response)s'), { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) LOG.warning( _LW("ServerProxy: Error details: " "status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r"), { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) active_server.failed = True # A failure on a delete means the object is gone from Neutron but not # from the controller. Set the consistency hash to a bad value to # trigger a sync on the next check. # NOTE: The hash must have a comma in it otherwise it will be ignored # by the backend. if action == 'DELETE': hash_handler.put_hash('INCONSISTENT,INCONSISTENT') # All servers failed, reset server list and try again next time LOG.error( _LE('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), { 'action': action, 'server': tuple((s.server, s.port) for s in self.servers) }) return first_response
def rest_call(self, action, resource, data='', headers=None, timeout=False, reconnect=False, hash_handler=None): uri = self.base_uri + resource body = jsonutils.dumps(data) headers = headers or {} headers['Content-type'] = 'application/json' headers['Accept'] = 'application/json' headers['NeutronProxy-Agent'] = self.name headers['Instance-ID'] = self.neutron_id headers['Orchestration-Service-ID'] = ORCHESTRATION_SERVICE_ID if hash_handler: # this will be excluded on calls that don't need hashes # (e.g. topology sync, capability checks) headers[HASH_MATCH_HEADER] = hash_handler.read_for_update() else: hash_handler = cdb.HashHandler() # TODO(kevinbenton): Re-enable keep-alive in a thread-safe fashion. # When multiple workers are enabled the saved connection gets mangled # by multiple threads so we always reconnect. if 'keep-alive' in self.capabilities and False: headers['Connection'] = 'keep-alive' else: reconnect = True if self.auth: headers['Authorization'] = self.auth LOG.debug("ServerProxy: server=%(server)s, port=%(port)d, " "ssl=%(ssl)r", {'server': self.server, 'port': self.port, 'ssl': self.ssl}) LOG.debug("ServerProxy: resource=%(resource)s, data=%(data)r, " "headers=%(headers)r, action=%(action)s", {'resource': resource, 'data': data, 'headers': headers, 'action': action}) # unspecified timeout is False because a timeout can be specified as # None to indicate no timeout. if timeout is False: timeout = self.timeout if timeout != self.timeout: # need a new connection if timeout has changed reconnect = True if not self.currentconn or reconnect: if self.currentconn: self.currentconn.close() if self.ssl: currentconn = HTTPSConnectionWithValidation( self.server, self.port, timeout=timeout) if currentconn is None: LOG.error(_LE('ServerProxy: Could not establish HTTPS ' 'connection')) return 0, None, None, None currentconn.combined_cert = self.combined_cert else: currentconn = httplib.HTTPConnection( self.server, self.port, timeout=timeout) if currentconn is None: LOG.error(_LE('ServerProxy: Could not establish HTTP ' 'connection')) return 0, None, None, None try: currentconn.request(action, uri, body, headers) response = currentconn.getresponse() respstr = response.read() respdata = respstr if response.status in self.success_codes: hash_value = response.getheader(HASH_MATCH_HEADER) # don't clear hash from DB if a hash header wasn't present if hash_value is not None: hash_handler.put_hash(hash_value) else: hash_handler.clear_lock() try: respdata = jsonutils.loads(respstr) except ValueError: # response was not JSON, ignore the exception pass else: # release lock so others don't have to wait for timeout hash_handler.clear_lock() ret = (response.status, response.reason, respstr, respdata) except httplib.HTTPException: # If we were using a cached connection, try again with a new one. with excutils.save_and_reraise_exception() as ctxt: currentconn.close() if reconnect: # if reconnect is true, this was on a fresh connection so # reraise since this server seems to be broken ctxt.reraise = True else: # if reconnect is false, it was a cached connection so # try one more time before re-raising ctxt.reraise = False return self.rest_call(action, resource, data, headers, timeout=timeout, reconnect=True) except (socket.timeout, socket.error) as e: currentconn.close() LOG.error(_LE('ServerProxy: %(action)s failure, %(e)r'), {'action': action, 'e': e}) ret = 0, None, None, None LOG.debug("ServerProxy: status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r", {'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3]}) return ret
def _bind_port_nfvswitch(self, context): """Perform bind_port for nfvswitch. A NFV VM needs to be attached to a nfv-switch socket. So, during bind_port() we create a NFV VM endpoint on BCF, thereby reserving the socket for it's use. Then pass the sock_path in the set_binding() for Nova to plug the VM to the nfv-switch. @param context: PortContext object """ vif_type = portbindings.VIF_TYPE_VHOST_USER port = self._prepare_port_for_controller(context) if not port: LOG.warning(_LW("nfv-switch bind_port() skipped due to missing " "Host ID.")) return # Create an endpoint corresponding to the port on the Controller, # thereby asking the Controller to reserve a vhost_sock for it tenant_id = port["network"]["tenant_id"] network_id = port["network"]["id"] # Set vif_type to 'vhost_user' for the Controller to reserve vhost_sock port[portbindings.VIF_TYPE] = vif_type try: self.async_port_create(tenant_id, network_id, port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error(_LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto(triggered_by_tenant=tenant_id) LOG.debug('Successfully created endpoint for nfv-switch VM %s' % port['id']) # Retrieve the vhost_socket reserved for the port(endpoint) by the # Controller and use it in set_binding() resp = self.servers.rest_get_port(tenant_id, network_id, port["id"]) if not resp or not isinstance(resp, list): LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock = None attachment_point = resp[0].get('attachment-point') if attachment_point: vhost_sock = attachment_point.get('interface') if not vhost_sock: LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock_path = self._get_vhost_user_sock_path(vhost_sock) LOG.debug('nfv-switch VM alloted sock_path %s' % vhost_sock_path) vif_details = { portbindings.CAP_PORT_FILTER: False, portbindings.VHOST_USER_MODE: portbindings.VHOST_USER_MODE_SERVER, portbindings.VHOST_USER_OVS_PLUG: False, portbindings.VHOST_USER_SOCKET: vhost_sock_path } for segment in context.segments_to_bind: if segment[api.NETWORK_TYPE] == pconst.TYPE_VLAN: context.set_binding(segment[api.ID], vif_type, vif_details)
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() # remove the auth token so it's not present in debug logs on the # backend controller cdict.pop('auth_token', None) headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict) hash_handler = cdb.HashHandler() good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: LOG.debug("ServerProxy: %(action)s to servers: " "%(server)r, %(resource)s" % {'action': action, 'server': (active_server.server, active_server.port), 'resource': resource}) for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error(_('Server requires synchronization, ' 'but no topology function was defined.')) self._topo_sync_in_progress = True eventlet.spawn_n(self.keep_updating_lock) try: data = self.get_topo_function( **self.get_topo_function_args) if data: active_server.rest_call('POST', TOPOLOGY_PATH, data, timeout=None) finally: self._topo_sync_in_progress = False if data is None: return None # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False LOG.debug("ServerProxy: %(action)s succeed for servers: " "%(server)r Response: %(response)s" % {'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3]}) return ret else: LOG.warning(_LW('ServerProxy: %(action)s failure for servers:' '%(server)r Response: %(response)s'), {'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3]}) LOG.warning(_LW("ServerProxy: Error details: " "status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r"), {'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3]}) active_server.failed = True # A failure on a delete means the object is gone from Neutron but not # from the controller. Set the consistency hash to a bad value to # trigger a sync on the next check. # NOTE: The hash must have a comma in it otherwise it will be ignored # by the backend. if action == 'DELETE': hash_handler.put_hash('INCONSISTENT,INCONSISTENT') # All servers failed, reset server list and try again next time LOG.error(_LE('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), {'action': action, 'server': tuple((s.server, s.port) for s in self.servers)}) return first_response
def rest_call(self, action, resource, data='', headers=None, timeout=False, reconnect=False, hash_handler=None): uri = self.base_uri + resource body = jsonutils.dumps(data) headers = headers or {} headers['Content-type'] = 'application/json' headers['Accept'] = 'application/json' headers['NeutronProxy-Agent'] = self.name headers['Instance-ID'] = self.neutron_id headers['Orchestration-Service-ID'] = ORCHESTRATION_SERVICE_ID if hash_handler: # this will be excluded on calls that don't need hashes # (e.g. topology sync, capability checks) headers[HASH_MATCH_HEADER] = hash_handler.read_for_update() else: hash_handler = cdb.HashHandler() # TODO(kevinbenton): Re-enable keep-alive in a thread-safe fashion. # When multiple workers are enabled the saved connection gets mangled # by multiple threads so we always reconnect. if 'keep-alive' in self.capabilities and False: headers['Connection'] = 'keep-alive' else: reconnect = True if self.auth: headers['Authorization'] = self.auth LOG.debug( "ServerProxy: server=%(server)s, port=%(port)d, " "ssl=%(ssl)r", { 'server': self.server, 'port': self.port, 'ssl': self.ssl }) LOG.debug( "ServerProxy: resource=%(resource)s, data=%(data)r, " "headers=%(headers)r, action=%(action)s", { 'resource': resource, 'data': data, 'headers': headers, 'action': action }) # unspecified timeout is False because a timeout can be specified as # None to indicate no timeout. if timeout is False: timeout = self.timeout if timeout != self.timeout: # need a new connection if timeout has changed reconnect = True if not self.currentconn or reconnect: if self.currentconn: self.currentconn.close() if self.ssl: currentconn = HTTPSConnectionWithValidation(self.server, self.port, timeout=timeout) if currentconn is None: LOG.error( _LE('ServerProxy: Could not establish HTTPS ' 'connection')) return 0, None, None, None currentconn.combined_cert = self.combined_cert else: currentconn = httplib.HTTPConnection(self.server, self.port, timeout=timeout) if currentconn is None: LOG.error( _LE('ServerProxy: Could not establish HTTP ' 'connection')) return 0, None, None, None try: currentconn.request(action, uri, body, headers) response = currentconn.getresponse() respstr = response.read() respdata = respstr if response.status in self.success_codes: hash_value = response.getheader(HASH_MATCH_HEADER) # don't clear hash from DB if a hash header wasn't present if hash_value is not None: # BVS-6979: race-condition(#1) set sync=false so that # keep_updating_thread doesn't squash updated HASH # Delay is required in-case the loop is already executing if resource == TOPOLOGY_PATH: self._topo_sync_in_progress = False time.sleep(0.10) hash_handler.put_hash(hash_value) else: hash_handler.clear_lock() try: respdata = jsonutils.loads(respstr) except ValueError: # response was not JSON, ignore the exception pass else: # BVS-6979: race-condition(#2) on HashConflict, don't unlock # to ensure topo_sync is scheduled next (it force grabs lock) if response.status != httplib.CONFLICT: # release lock so others don't have to wait for timeout hash_handler.clear_lock() ret = (response.status, response.reason, respstr, respdata) except httplib.HTTPException: # If we were using a cached connection, try again with a new one. with excutils.save_and_reraise_exception() as ctxt: currentconn.close() if reconnect: # if reconnect is true, this was on a fresh connection so # reraise since this server seems to be broken ctxt.reraise = True else: # if reconnect is false, it was a cached connection so # try one more time before re-raising ctxt.reraise = False return self.rest_call(action, resource, data, headers, timeout=timeout, reconnect=True) except (socket.timeout, socket.error) as e: currentconn.close() LOG.error(_LE('ServerProxy: %(action)s failure, %(e)r'), { 'action': action, 'e': e }) ret = 0, None, None, None LOG.debug( "ServerProxy: status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r", { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) return ret
def _bind_port_nfvswitch(self, context, segment, host_id): """Perform bind_port for nfvswitch. A NFV VM needs to be attached to a nfv-switch socket. So, during bind_port() we create a NFV VM endpoint on BCF, thereby reserving the socket for it's use. Then pass the sock_path in the set_binding() for Nova to plug the VM to the nfv-switch. @param context: PortContext object """ vif_type = portbindings.VIF_TYPE_VHOST_USER port = self._prepare_port_for_controller(context) if not port: LOG.warning( _LW("nfv-switch bind_port() skipped due to missing " "Host ID.")) return # Create an endpoint corresponding to the port on the Controller, # thereby asking the Controller to reserve a vhost_sock for it tenant_id = port["network"]["tenant_id"] network_id = port["network"]["id"] # Set vif_type to 'vhost_user' for the Controller to reserve vhost_sock port[portbindings.VIF_TYPE] = vif_type # Update host_id so that endpoint create will have the correct value port[portbindings.HOST_ID] = host_id try: self.async_port_create(tenant_id, network_id, port) except servermanager.RemoteRestError as e: with excutils.save_and_reraise_exception() as ctxt: if (cfg.CONF.RESTPROXY.auto_sync_on_failure and e.status == httplib.NOT_FOUND and servermanager.NXNETWORK in e.reason): ctxt.reraise = False LOG.error( _LE("Inconsistency with backend controller " "triggering full synchronization.")) self._send_all_data_auto(triggered_by_tenant=tenant_id) # Retrieve the vhost_socket reserved for the port(endpoint) by the # Controller and use it in set_binding() resp = self.servers.rest_get_port(tenant_id, network_id, port["id"]) if not resp or not isinstance(resp, list): LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock = None attachment_point = resp[0].get('attachment-point') if attachment_point: vhost_sock = attachment_point.get('interface') if not vhost_sock: LOG.warning(_LW("Controller failed to reserve a nfv-switch sock")) return vhost_sock_path = self._get_vhost_user_sock_path(vhost_sock) LOG.info(_LI('nfv-switch VM %(port)s alloted sock_path %(sock)s'), { 'port': port['id'], 'sock': vhost_sock_path }) # Update vif_details with host_id. This way, for all BCF # communications, we we shall use it as HOST_ID (i.e. interface-group # on BCF) vif_details = { portbindings.CAP_PORT_FILTER: False, portbindings.VHOST_USER_MODE: portbindings.VHOST_USER_MODE_SERVER, portbindings.VHOST_USER_OVS_PLUG: False, portbindings.VHOST_USER_SOCKET: vhost_sock_path, VIF_DET_BSN_VSWITCH_HOST_ID: host_id } context.set_binding(segment[api.ID], vif_type, vif_details)