def test_force_rereg_with_incompatible_schema(self): '''PUT /tasr/subject/<subject>/force_register - incompatible schemas''' mode = APP.config.mode orig_val = APP.config.config.get(mode, 'expose_force_register') APP.config.config.set(mode, 'expose_force_register', 'True') try: resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(1, meta.group_version(self.event_type), 'bad ver') # swap long for int, an incompatible change targ = '{"name": "source__timestamp", "type": "long"}' replacement = '{"name": "source__timestamp", "type": "int"}' incompat_schema_str = self.schema_str.replace(targ, replacement, 1) # now forcibly registering the schema should return a 201 reg_url = '%s/force_register' % self.subject_url resp1 = self.tasr_app.request(reg_url, method='PUT', content_type=self.content_type, expect_errors=False, body=incompat_schema_str) self.abort_diff_status(resp1, 201) meta = SchemaHeaderBot.extract_metadata(resp1) self.assertEqual(2, meta.group_version(self.event_type), 'bad ver') finally: # reset expose_force_register to its original value APP.config.config.set(mode, 'expose_force_register', orig_val)
def test_lookup_by_md5_id_str(self): '''GET /tasr/id/<id_str> - as expected''' md5_ids = [] schemas = [] # add a bunch of versions for our subject for v in range(1, 50): ver_schema_str = self.get_schema_permutation(self.schema_str, "fn_%s" % v) resp = self.register_schema(self.event_type, ver_schema_str) self.abort_diff_status(resp, 201) ver_meta = SchemaHeaderBot.extract_metadata(resp) md5_ids.append(ver_meta.md5_id) # schema str with canonicalized whitespace returned canonicalized_schema_str = resp.body schemas.append(canonicalized_schema_str) # step through and request each version by version number for v in range(1, 50): get_url = '%s/id/%s' % (self.url_prefix, md5_ids[v - 1]) get_resp = self.tasr_app.request(get_url, method='GET') self.abort_diff_status(get_resp, 200) meta = SchemaHeaderBot.extract_metadata(get_resp) self.assertEqual(md5_ids[v - 1], meta.md5_id, 'bad ID') self.assertEqual(schemas[v - 1], get_resp.body, u'Unexpected body: %s' % get_resp.body)
def test_reg_and_rereg(self): '''PUT /tasr/subject/<subject>/register - multiple calls, one schema''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(1, meta.group_version(self.event_type), 'bad ver') resp1 = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp1, 200) meta1 = SchemaHeaderBot.extract_metadata(resp1) self.assertEqual(1, meta1.group_version(self.event_type), 'bad ver')
def test_reg_and_rereg(self): '''PUT /tasr/topic/<topic name> - multiple calls, same schema''' resp = self.register_schema(self.schema_str) self.abort_diff_status(resp, 201) smeta = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver') # on the re-registration, we should get the same version back resp2 = self.register_schema(self.schema_str) self.abort_diff_status(resp2, 200) smeta2 = SchemaHeaderBot.extract_metadata(resp2) self.assertEqual(1, smeta2.group_version(self.event_type), 'Re-reg produced a different group version.')
def test_multi_subject_reg(self): '''PUT /tasr/subject/<subject>/register - multi subjects, one schema''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(1, meta.group_version(self.event_type), 'bad ver') alt_subject = 'bob' resp2 = self.register_schema(alt_subject, self.schema_str) self.abort_diff_status(resp2, 201) meta2 = SchemaHeaderBot.extract_metadata(resp2) self.assertEqual(1, meta2.group_version(alt_subject), 'bad ver') # check that first association still holds resp3 = self.tasr_app.get('%s/latest' % self.subject_url) meta3 = SchemaHeaderBot.extract_metadata(resp3) self.assertEqual(1, meta3.group_version(self.event_type), 'lost reg')
def test_register_schema(self): '''PUT /tasr/topic/<topic name> - as expected''' resp = self.register_schema(self.schema_str) self.abort_diff_status(resp, 201) smeta = SchemaHeaderBot.extract_metadata(resp) self.assertIn(self.event_type, smeta.group_names, 'event_type missing') self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver') self.assertTrue(smeta.group_timestamp(self.event_type), 'missing ts')
def test_get_for_topic_and_version_fail_on_bad_version(self): '''GET /tasr/topic/<topic name>/version/<version> - fail on bad ver''' put_resp = self.register_schema(self.schema_str) smeta = SchemaHeaderBot.extract_metadata(put_resp) self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver') bad_ver = smeta.group_version(self.event_type) + 1 url = "%s/version/%s" % (self.topic_url, bad_ver) get_resp = self.tasr_app.request(url, method='GET', expect_errors=True) self.abort_diff_status(get_resp, 404)
def test_fail_lookup_for_subject_and_version_on_bad_version(self): '''GET /tasr/subject/<subject>/version/<version> - fail on bad ver''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) bad_version = meta.group_version(self.event_type) + 1 get_url = '%s/version/%s' % (self.subject_url, bad_version) get_resp = self.tasr_app.request(get_url, method='GET', expect_errors=True) self.abort_diff_status(get_resp, 404)
def schema_for_schema_str(schema_str, object_on_miss=False, host=TASR_HOST, port=TASR_PORT, timeout=TIMEOUT): ''' POST /tasr/schema In essence this is very similar to the schema_for_id_str, but with the calculation of the ID string being moved to the server. That is, the client POSTs the schema JSON itself, the server canonicalizes it, then calculates the SHA256-based ID string for what was sent, then looks for a matching schema based on that ID string. This allows clients that do not know how to canonicalize or hash the schemas to find the metadata (is it registered, what version does it have for a topic) with what they have. A RegisteredSchema object is returned if the schema string POSTed has been registered for one or more topics. If the schema string POSTed has yet to be registered for a topic and the object_on_miss flag is True, a RegisteredSchema calculated for the POSTed schema string is returned (it will have no topic-versions as there are none). This provides an easy way for a client to get the ID strings to use for subsequent requests. If the object_on_miss flag is False (the default), then a request for a previously unregistered schema will raise a TASRError. ''' url = 'http://%s:%s/tasr/schema' % (host, port) headers = {'content-type': 'application/json; charset=utf8', } resp = requests.post(url, data=schema_str, headers=headers, timeout=timeout) if resp == None: raise TASRError('Timeout for request to %s' % url) if 200 == resp.status_code: # success -- return a normal reg schema ras = RegisteredAvroSchema() ras.schema_str = resp.context schema_meta = SchemaHeaderBot.extract_metadata(resp) ras.update_from_schema_metadata(schema_meta) return ras elif 404 == resp.status_code and object_on_miss: ras = RegisteredAvroSchema() ras.schema_str = schema_str schema_meta = SchemaHeaderBot.extract_metadata(resp) ras.update_from_schema_metadata(schema_meta) return ras raise TASRError('Schema not registered to any topics.')
def test_lookup_by_sha256_id__accept_json(self): '''GET /tasr/id/<SHA256 ID> - "Accept: text/json" as expected''' put_resp = self.register_schema(self.event_type, self.schema_str) # the canonicalized form returned has normalized whitespace canonicalized_schema_str = put_resp.body smeta = SchemaHeaderBot.extract_metadata(put_resp) self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver') url = "%s/id/%s" % (self.url_prefix, smeta.sha256_id) get_resp = self.tasr_app.request(url, method='GET', accept='text/json') self.abort_diff_status(get_resp, 200) self.assertEqual(canonicalized_schema_str, get_resp.body, u'Unexpected body: %s' % get_resp.body)
def test_fail_lookup_by_schema_str_on_unreg_schema_str(self): '''POST /tasr/schema - fail on new schema string''' post_url = "%s/schema" % self.url_prefix resp = self.tasr_app.request(post_url, method='POST', content_type=self.content_type, expect_errors=True, body=self.schema_str) self.assertEqual(404, resp.status_int, u'Unexpected status: %s' % resp.status_int) meta = SchemaHeaderBot.extract_metadata(resp) self.assertTrue(meta.sha256_id, 'SHA missing') self.assertTrue(meta.md5_id, 'MD5 missing')
def test_get_latest_1(self): '''GET /tasr/topic/<topic name> - as expected''' put_resp = self.register_schema(self.schema_str) # the canonicalized form returned has normalized whitespace canonicalized_schema_str = put_resp.body # now pull it back with a GET get_resp = self.tasr_app.request(self.topic_url, method='GET') self.abort_diff_status(get_resp, 200) smeta = SchemaHeaderBot.extract_metadata(get_resp) self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver') self.assertEqual(canonicalized_schema_str, get_resp.body, u'Unexpected body: %s' % get_resp.body)
def test_lookup_for_subject_and_version_on_stale_version(self): '''GET /tasr/subject/<subject>/version/<version> - 1 schema, 2 vers''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) schema_str_2 = self.get_schema_permutation(self.schema_str) resp = self.register_schema(self.event_type, schema_str_2) self.abort_diff_status(resp, 201) resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) # get the latest version, which should be 3 resp = self.tasr_app.get('%s/latest' % self.subject_url) self.abort_diff_status(resp, 200) meta_v3 = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(3, meta_v3.group_version(self.event_type), 'bad ver') # now get ver 1, which should have the same body as ver 3 get_url = '%s/version/%s' % (self.subject_url, 1) get_resp = self.tasr_app.request(get_url, method='GET') self.abort_diff_status(get_resp, 200) meta_v1 = SchemaHeaderBot.extract_metadata(get_resp) self.assertEqual(1, meta_v1.group_version(self.event_type), 'bad ver') self.assertEqual(resp.body, get_resp.body, 'schema body mismatch')
def test_get_for_stale_version(self): '''GET /tasr/topic/<topic name>/version/<version> - 1 schema, 2 vers''' put_resp = self.register_schema(self.schema_str) # the canonicalized form returned has normalized whitespace canonicalized_schema_str = put_resp.body self.abort_diff_status(put_resp, 201) schema_str_2 = self.schema_str.replace('tagged.events', 'tagged.events.alt', 1) put_resp2 = self.register_schema(schema_str_2) self.abort_diff_status(put_resp2, 201) put_resp3 = self.register_schema(self.schema_str) smeta = SchemaHeaderBot.extract_metadata(put_resp3) self.assertEqual(3, smeta.group_version(self.event_type), 'bad ver') # now get version 1 -- should be same schema, but diff ver in headers url = "%s/version/%s" % (self.topic_url, 1) get_resp = self.tasr_app.request(url, method='GET', expect_errors=True) self.abort_diff_status(get_resp, 200) self.assertEqual(canonicalized_schema_str, get_resp.body, u'Unexpected body: %s' % get_resp.body) smeta = SchemaHeaderBot.extract_metadata(get_resp) self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver')
def test_multi_topic_reg(self): '''PUT /tasr/topic/<topic name> - multiple group_names, same schema''' put_resp = self.register_schema(self.schema_str) self.abort_diff_status(put_resp, 201) smeta = SchemaHeaderBot.extract_metadata(put_resp) self.assertEqual(1, smeta.group_version(self.event_type), 'bad ver') alt_topic = 'bob' alt_url = '%s/topic/%s' % (self.url_prefix, alt_topic) put_resp2 = self.tasr_app.request(alt_url, method='PUT', content_type=self.content_type, body=self.schema_str) self.abort_diff_status(put_resp2, 201) smeta2 = SchemaHeaderBot.extract_metadata(put_resp2) self.assertEqual(1, smeta2.group_version(alt_topic), 'bad ver') # getting by ID gives us all topic associations in headers id_url = "%s/id/%s" % (self.url_prefix, smeta.sha256_id) get_resp = self.tasr_app.request(id_url, method='GET') smeta3 = SchemaHeaderBot.extract_metadata(get_resp) self.assertEqual(1, smeta3.group_version(self.event_type), 'bad ver') self.assertEqual(1, smeta3.group_version(alt_topic), 'bad ver')
def test_lookup_by_schema_str(self): '''POST /tasr/subject/<subject>/schema - as expected''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) # canonicalized schema string is passed back on registration canonicalized_schema_str = resp.body meta_1 = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(1, meta_1.group_version(self.event_type), 'bad ver') schema_str_2 = self.get_schema_permutation(self.schema_str) resp = self.register_schema(self.event_type, schema_str_2) self.abort_diff_status(resp, 201) # get by POSTed schema post_url = "%s/schema" % self.subject_url post_resp = self.tasr_app.request(post_url, method='POST', content_type=self.content_type, body=self.schema_str) meta_2 = SchemaHeaderBot.extract_metadata(post_resp) self.assertEqual(1, meta_2.group_version(self.event_type), 'bad ver') self.assertEqual(meta_1.sha256_id, meta_2.sha256_id, 'SHA mismatch') self.assertEqual(meta_1.md5_id, meta_2.md5_id, 'MD5 mismatch') self.assertEqual(canonicalized_schema_str, post_resp.body, u'Unexpected body: %s' % post_resp.body)
def test_reg_if_latest(self): '''PUT /tasr/subject/<subject name>/register_if_latest/<version> As expected, we reference the version number of the latest version. ''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) cur_ver = meta.group_version(self.event_type) schema_str_2 = self.get_schema_permutation(self.schema_str) url = '%s/register_if_latest/%s' % (self.subject_url, cur_ver) resp = self.tasr_app.request(url, method='PUT', content_type=self.content_type, body=schema_str_2) self.abort_diff_status(resp, 201)
def test_fail_reg_if_latest_bad_ver(self): '''PUT /tasr/subject/<subject name>/register_if_latest/<version> Should fail as version number is non-existent. ''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) bad_ver = meta.group_version(self.event_type) + 1 schema_str_2 = self.get_schema_permutation(self.schema_str) url = '%s/register_if_latest/%s' % (self.subject_url, bad_ver) resp = self.tasr_app.request(url, method='PUT', content_type=self.content_type, expect_errors=True, body=schema_str_2) self.abort_diff_status(resp, 409)
def test_fail_lookup_by_schema_str_on_unreg_schema_str__accept_json(self): '''POST /tasr/schema - fail on new schema string''' post_url = "%s/schema" % self.url_prefix resp = self.tasr_app.request(post_url, method='POST', content_type=self.content_type, accept='text/json', expect_errors=True, body=self.schema_str) self.assertEqual(404, resp.status_int, u'Unexpected status: %s' % resp.status_int) meta = SchemaHeaderBot.extract_metadata(resp) self.assertTrue(meta.sha256_id, 'SHA missing') self.assertTrue(meta.md5_id, 'MD5 missing') # we expect a JSON error back, so check that we got it json_error = json.loads(resp.body) # body is parseable JSON self.assertEqual(404, json_error["status_code"], "expected a 404")
def test_lookup_by_sha256_id_str__bad_id(self): '''GET /tasr/id/<id str> - fail on bad ID''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) ver_meta = SchemaHeaderBot.extract_metadata(resp) sha256_id = ver_meta.sha256_id # get a "bad" ID from a different schema string rs = tasr.registered_schema.RegisteredSchema() rs.schema_str = self.schema_str.replace('tagged.events', 'bob') bad_sha256_id = rs.sha256_id self.assertNotEqual(sha256_id, bad_sha256_id, 'IDs should differ') # try getting the schema for the "bad" ID get_url = '%s/id/%s' % (self.url_prefix, bad_sha256_id) get_resp = self.tasr_app.request(get_url, method='GET', expect_errors=True) self.abort_diff_status(get_resp, 404)
def test_fail_rereg_with_incompatible_schema(self): '''PUT /tasr/subject/<subject>/register - incompatible schemas''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(1, meta.group_version(self.event_type), 'bad ver') # swap long for int, an incompatible change targ = '{"name": "source__timestamp", "type": "long"}' replacement = '{"name": "source__timestamp", "type": "int"}' incompat_schema_str = self.schema_str.replace(targ, replacement, 1) # now registering the schema should return a 409 reg_url = '%s/register' % self.subject_url resp1 = self.tasr_app.request(reg_url, method='PUT', content_type=self.content_type, expect_errors=True, body=incompat_schema_str) self.abort_diff_status(resp1, 409)
def reg_schema_from_url(url, method='GET', data=None, headers=None, timeout=TIMEOUT, err_404='No such object.'): '''A generic method to call a URL and transform the reply into a RegisteredSchema object. Most of the API calls can use this skeleton. ''' schema_str = None resp = None if headers == None: headers = {'Accept': 'application/json', } elif isinstance(headers, dict): headers['Accept'] = 'application/json' try: if method.upper() == 'GET': resp = requests.get(url, timeout=timeout) schema_str = resp.content elif method.upper() == 'POST': resp = requests.post(url, data=data, headers=headers, timeout=timeout) schema_str = resp.content elif method.upper() == 'PUT': resp = requests.put(url, data=data, headers=headers, timeout=timeout) schema_str = resp.content # check for error cases if resp == None: raise TASRError('Timeout for request to %s' % url) if 404 == resp.status_code: raise TASRError(err_404) if 409 == resp.status_code: raise TASRError(resp.content) if not resp.status_code in [200, 201]: raise TASRError('Failed request to %s (status code: %s)' % (url, resp.status_code)) # OK - so construct the RS and return it ras = RegisteredAvroSchema() ras.schema_str = schema_str ras.created = True if resp.status_code == 201 else False schema_meta = SchemaHeaderBot.extract_metadata(resp) if schema_str and not schema_meta.sha256_id == ras.sha256_id: raise TASRError('Schema was modified in transit.') ras.update_from_schema_metadata(schema_meta) return ras except Exception as exc: raise TASRError(exc)
def test_lookup_by_sha256_id_str__accept_json__bad_id(self): '''GET /tasr/id/<id str> - "Accept: text/json" fail on bad ID''' resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) ver_meta = SchemaHeaderBot.extract_metadata(resp) sha256_id = ver_meta.sha256_id # get a "bad" ID from a different schema string rs = tasr.registered_schema.RegisteredSchema() rs.schema_str = self.schema_str.replace('tagged.events', 'bob') bad_sha256_id = rs.sha256_id self.assertNotEqual(sha256_id, bad_sha256_id, 'IDs should differ') # try getting the schema for the "bad" ID get_url = '%s/id/%s' % (self.url_prefix, bad_sha256_id) get_resp = self.tasr_app.request(get_url, method='GET', accept='text/json', expect_errors=True) self.abort_diff_status(get_resp, 404) # we expect a JSON error back, so check that we got it json_error = json.loads(get_resp.body) # body is parseable JSON self.assertEqual(404, json_error["status_code"], "expected a 404")
def test_lookup_latest(self): '''GET /tasr/subject/<subject name>/latest''' # should be nothing there to start with resp = self.tasr_app.request('%s/latest' % self.subject_url, method='GET', expect_errors=True) self.abort_diff_status(resp, 404) # reg a schema so we'll have something to lookup resp = self.register_schema(self.event_type, self.schema_str) self.abort_diff_status(resp, 201) # reg a second schema so we could get a stale version schema_str_2 = self.get_schema_permutation(self.schema_str) resp = self.register_schema(self.event_type, schema_str_2) self.abort_diff_status(resp, 201) # check that lookup gets the _latest_ schema resp = self.tasr_app.get('%s/latest' % self.subject_url) self.abort_diff_status(resp, 200) meta = SchemaHeaderBot.extract_metadata(resp) self.assertEqual(2, meta.group_version(self.event_type), 'bad ver')
def test_all_subject_ids(self): '''GET /tasr/subject/<subject>/all_ids - gets schema IDs for all versions of the subject, in order, one per line in the response body. ''' sha256_ids = [] # add a bunch of versions for our subject for v in range(1, 50): ver_schema_str = self.get_schema_permutation(self.schema_str, "fn_%s" % v) resp = self.register_schema(self.event_type, ver_schema_str) self.abort_diff_status(resp, 201) meta = SchemaHeaderBot.extract_metadata(resp) sha256_ids.append(meta.sha256_id) url = '%s/all_ids' % self.subject_url resp = self.tasr_app.get(url) buff = StringIO.StringIO(resp.body) all_ids = [] for topic_line in buff: all_ids.append(topic_line.strip()) buff.close() self.assertListEqual(sha256_ids, all_ids, 'Bad ID list.')
def test_lookup_by_subject_and_version(self): '''GET /tasr/subject/<subject>/version/<version> - as expected''' schemas = [] # add a bunch of versions for our subject for v in range(1, 50): ver_schema_str = self.get_schema_permutation(self.schema_str, "fn_%s" % v) resp = self.register_schema(self.event_type, ver_schema_str) self.abort_diff_status(resp, 201) # schema str with canonicalized whitespace returned canonicalized_schema_str = resp.body schemas.append(canonicalized_schema_str) # step through and request each version by version number for v in range(1, 50): get_url = '%s/version/%s' % (self.subject_url, v) get_resp = self.tasr_app.request(get_url, method='GET') self.abort_diff_status(get_resp, 200) meta = SchemaHeaderBot.extract_metadata(get_resp) self.assertEqual(v, meta.group_version(self.event_type), 'bad ver') self.assertEqual(schemas[v - 1], get_resp.body, u'Unexpected body: %s' % get_resp.body)