def test_clear_lock_skip_after_steal(self): handler1 = consistency_db.HashHandler() handler1.read_for_update() # lock the table handler2 = consistency_db.HashHandler() with mock.patch.object(consistency_db, 'MAX_LOCK_WAIT_TIME', new=0): handler2.read_for_update() before = self._get_hash_from_handler_db(handler1) # handler1 should not clear handler2's lock handler1.clear_lock() self.assertEqual(before, self._get_hash_from_handler_db(handler1))
def test_failure_to_steal_lock(self): handler1 = consistency_db.HashHandler() handler1.read_for_update() # lock the table handler2 = consistency_db.HashHandler() with contextlib.nested( mock.patch.object(consistency_db, 'MAX_LOCK_WAIT_TIME'), mock.patch.object(handler2, '_optimistic_update_hash_record', side_effect=[False, True]) ) as (mlock, oplock): # handler2 will go through 2 iterations since the lock will fail on # the first attempt mlock.__lt__.side_effect = [False, True, False, True] handler2.read_for_update() self.assertEqual(4, mlock.__lt__.call_count) self.assertEqual(2, oplock.call_count)
def test_take_lock_from_other(self): handler1 = consistency_db.HashHandler() handler1.read_for_update() # lock the table handler2 = consistency_db.HashHandler() with mock.patch.object(consistency_db, 'MAX_LOCK_WAIT_TIME') as mlock: # make handler2 wait for only one iteration mlock.__lt__.side_effect = [False, True] handler2.read_for_update() # once MAX LOCK exceeded, comparisons should stop due to lock steal self.assertEqual(2, mlock.__lt__.call_count) dbentry = self._get_hash_from_handler_db(handler1) # handler2 should have the lock self.assertIn(handler2.lock_marker, dbentry) self.assertNotIn(handler1.lock_marker, dbentry) # lock protection only blocks read_for_update, anyone can change handler1.put_hash('H1')
def test_clear_lock(self): handler = consistency_db.HashHandler() handler.put_hash('SOMEHASH') handler.read_for_update() # lock the table self.assertEqual(handler.lock_marker + 'SOMEHASH', self._get_hash_from_handler_db(handler)) handler.clear_lock() self.assertEqual('SOMEHASH', self._get_hash_from_handler_db(handler))
def test_handler_already_holding_lock(self): handler = consistency_db.HashHandler() handler.read_for_update() # lock the table with mock.patch.object(handler._FACADE, 'get_engine') as ge: handler.read_for_update() # get engine should not have been called because no update # should have been made self.assertFalse(ge.called)
def test_db_duplicate_on_insert(self): handler = consistency_db.HashHandler() with mock.patch.object( handler.session, 'add', side_effect=[db_exc.DBDuplicateEntry, ''] ) as add_mock: handler.read_for_update() # duplicate insert failure should result in retry self.assertEqual(2, add_mock.call_count)
def test_delete_failure_sets_bad_hash(self): pl = NeutronManager.get_plugin() hash_handler = cdb.HashHandler() with mock.patch(SERVERMANAGER + '.ServerProxy.rest_call', return_value=(httplib.INTERNAL_SERVER_ERROR, 0, 0, 0)): # a failed delete call should put a bad hash in the DB pl.servers.rest_call('DELETE', '/', '', None, []) self.assertEqual('INCONSISTENT,INCONSISTENT', hash_handler.read_for_update())
def test_hash_handle_lock_no_initial_record(self): handler = consistency_db.HashHandler() h1 = handler.read_for_update() # return to caller should be empty even with lock in DB self.assertFalse(h1) # db should have a lock marker self.assertEqual(handler.lock_marker, self._get_hash_from_handler_db(handler)) # an entry should clear the lock handler.put_hash('DIGEST') self.assertEqual('DIGEST', self._get_hash_from_handler_db(handler))
def test_hash_handle_lock_existing_record(self): handler = consistency_db.HashHandler() handler.put_hash('DIGEST') # set initial hash h1 = handler.read_for_update() self.assertEqual('DIGEST', h1) self.assertEqual(handler.lock_marker + 'DIGEST', self._get_hash_from_handler_db(handler)) # make sure update works handler.put_hash('DIGEST2') self.assertEqual('DIGEST2', self._get_hash_from_handler_db(handler))
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): hash_handler = cdb.HashHandler(context=self.get_context_ref()) good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error(_('Server requires synchronization, ' 'but no topology function was defined.')) # The hash was incorrect so it needs to be removed hash_handler.put_hash('') data = self.get_topo_function(**self.get_topo_function_args) active_server.rest_call('PUT', TOPOLOGY_PATH, data, timeout=None) # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False return ret else: LOG.error(_('ServerProxy: %(action)s failure for servers: ' '%(server)r Response: %(response)s'), {'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3]}) LOG.error(_("ServerProxy: Error details: status=%(status)d, " "reason=%(reason)r, ret=%(ret)s, data=%(data)r"), {'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3]}) active_server.failed = True # All servers failed, reset server list and try again next time LOG.error(_('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), {'action': action, 'server': tuple((s.server, s.port) for s in self.servers)}) return first_response
def test_update_hit_no_records(self): handler = consistency_db.HashHandler() # set initial hash so update will be required handler.put_hash('DIGEST') with mock.patch.object(handler._FACADE, 'get_engine') as ge: conn = ge.return_value.begin.return_value.__enter__.return_value firstresult = mock.Mock() # a rowcount of 0 simulates the effect of another db client # updating the same record the handler was trying to update firstresult.rowcount = 0 secondresult = mock.Mock() secondresult.rowcount = 1 conn.execute.side_effect = [firstresult, secondresult] handler.read_for_update() # update should have been called again after the failure self.assertEqual(2, conn.execute.call_count)
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() # remove the auth token so it's not present in debug logs on the # backend controller cdict.pop('auth_token', None) headers[REQ_CONTEXT_HEADER] = jsonutils.dumps(cdict) hash_handler = cdb.HashHandler() good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error( _('Server requires synchronization, ' 'but no topology function was defined.')) data = self.get_topo_function(**self.get_topo_function_args) active_server.rest_call('PUT', TOPOLOGY_PATH, data, timeout=None) # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False return ret else: LOG.error( _('ServerProxy: %(action)s failure for servers: ' '%(server)r Response: %(response)s'), { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) LOG.error( _("ServerProxy: Error details: status=%(status)d, " "reason=%(reason)r, ret=%(ret)s, data=%(data)r"), { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) active_server.failed = True # A failure on a delete means the object is gone from Neutron but not # from the controller. Set the consistency hash to a bad value to # trigger a sync on the next check. # NOTE: The hash must have a comma in it otherwise it will be ignored # by the backend. if action == 'DELETE': hash_handler.put_hash('INCONSISTENT,INCONSISTENT') # All servers failed, reset server list and try again next time LOG.error( _('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), { 'action': action, 'server': tuple((s.server, s.port) for s in self.servers) }) return first_response
def rest_call(self, action, resource, data='', headers=None, timeout=False, reconnect=False, hash_handler=None): uri = self.base_uri + resource body = jsonutils.dumps(data) headers = headers or {} headers['Content-type'] = 'application/json' headers['Accept'] = 'application/json' headers['NeutronProxy-Agent'] = self.name headers['Instance-ID'] = self.neutron_id headers['Orchestration-Service-ID'] = ORCHESTRATION_SERVICE_ID if hash_handler: # this will be excluded on calls that don't need hashes # (e.g. topology sync, capability checks) headers[HASH_MATCH_HEADER] = hash_handler.read_for_update() else: hash_handler = cdb.HashHandler() if 'keep-alive' in self.capabilities: headers['Connection'] = 'keep-alive' else: reconnect = True if self.auth: headers['Authorization'] = self.auth LOG.debug( _("ServerProxy: server=%(server)s, port=%(port)d, " "ssl=%(ssl)r"), { 'server': self.server, 'port': self.port, 'ssl': self.ssl }) LOG.debug( _("ServerProxy: resource=%(resource)s, data=%(data)r, " "headers=%(headers)r, action=%(action)s"), { 'resource': resource, 'data': data, 'headers': headers, 'action': action }) # unspecified timeout is False because a timeout can be specified as # None to indicate no timeout. if timeout is False: timeout = self.timeout if timeout != self.timeout: # need a new connection if timeout has changed reconnect = True if not self.currentconn or reconnect: if self.currentconn: self.currentconn.close() if self.ssl: self.currentconn = HTTPSConnectionWithValidation( self.server, self.port, timeout=timeout) if self.currentconn is None: LOG.error( _('ServerProxy: Could not establish HTTPS ' 'connection')) return 0, None, None, None self.currentconn.combined_cert = self.combined_cert else: self.currentconn = httplib.HTTPConnection(self.server, self.port, timeout=timeout) if self.currentconn is None: LOG.error( _('ServerProxy: Could not establish HTTP ' 'connection')) return 0, None, None, None try: self.currentconn.request(action, uri, body, headers) response = self.currentconn.getresponse() respstr = response.read() respdata = respstr if response.status in self.success_codes: hash_value = response.getheader(HASH_MATCH_HEADER) # don't clear hash from DB if a hash header wasn't present if hash_value is not None: hash_handler.put_hash(hash_value) else: hash_handler.clear_lock() try: respdata = jsonutils.loads(respstr) except ValueError: # response was not JSON, ignore the exception pass else: # release lock so others don't have to wait for timeout hash_handler.clear_lock() ret = (response.status, response.reason, respstr, respdata) except httplib.HTTPException: # If we were using a cached connection, try again with a new one. with excutils.save_and_reraise_exception() as ctxt: self.currentconn.close() if reconnect: # if reconnect is true, this was on a fresh connection so # reraise since this server seems to be broken ctxt.reraise = True else: # if reconnect is false, it was a cached connection so # try one more time before re-raising ctxt.reraise = False return self.rest_call(action, resource, data, headers, timeout=timeout, reconnect=True) except (socket.timeout, socket.error) as e: self.currentconn.close() LOG.error(_('ServerProxy: %(action)s failure, %(e)r'), { 'action': action, 'e': e }) ret = 0, None, None, None LOG.debug( _("ServerProxy: status=%(status)d, reason=%(reason)r, " "ret=%(ret)s, data=%(data)r"), { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) return ret
def rest_call(self, action, resource, data, headers, ignore_codes, timeout=False): context = self.get_context_ref() if context: # include the requesting context information if available cdict = context.to_dict() headers[REQ_CONTEXT_HEADER] = json.dumps(cdict) hash_handler = cdb.HashHandler(context=context) good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): ret = active_server.rest_call(action, resource, data, headers, timeout, reconnect=self.always_reconnect, hash_handler=hash_handler) if ret[0] != httplib.SERVICE_UNAVAILABLE: break time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: raise cfg.Error( _('Server requires synchronization, ' 'but no topology function was defined.')) data = self.get_topo_function(**self.get_topo_function_args) active_server.rest_call('PUT', TOPOLOGY_PATH, data, timeout=None) # Store the first response as the error to be bubbled up to the # user since it was a good server. Subsequent servers will most # likely be cluster slaves and won't have a useful error for the # user (e.g. 302 redirect to master) if not first_response: first_response = ret if not self.server_failure(ret, ignore_codes): active_server.failed = False return ret else: LOG.error( _('ServerProxy: %(action)s failure for servers: ' '%(server)r Response: %(response)s'), { 'action': action, 'server': (active_server.server, active_server.port), 'response': ret[3] }) LOG.error( _("ServerProxy: Error details: status=%(status)d, " "reason=%(reason)r, ret=%(ret)s, data=%(data)r"), { 'status': ret[0], 'reason': ret[1], 'ret': ret[2], 'data': ret[3] }) active_server.failed = True # All servers failed, reset server list and try again next time LOG.error( _('ServerProxy: %(action)s failure for all servers: ' '%(server)r'), { 'action': action, 'server': tuple((s.server, s.port) for s in self.servers) }) return first_response