def test_probe_waste_adding_one_server(self): hashes = [ hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(100) ] initial_services = 12 api_client = self.mock_keep_services(count=initial_services) keep_client = arvados.KeepClient(api_client=api_client) probes_before = [ keep_client.weighted_service_roots(hash) for hash in hashes ] for added_services in range(1, 12): api_client = self.mock_keep_services(count=initial_services + added_services) keep_client = arvados.KeepClient(api_client=api_client) total_penalty = 0 for hash_index in range(len(hashes)): probe_after = keep_client.weighted_service_roots( hashes[hash_index]) penalty = probe_after.index(probes_before[hash_index][0]) self.assertLessEqual(penalty, added_services) total_penalty += penalty # Average penalty per block should not exceed # N(added)/N(orig) by more than 20%, and should get closer # to the ideal as we add data points. expect_penalty = (added_services * len(hashes) / initial_services) max_penalty = (expect_penalty * (120 - added_services) / 100) min_penalty = (expect_penalty * 8 / 10) self.assertTrue( min_penalty <= total_penalty <= max_penalty, "With {}+{} services, {} blocks, penalty {} but expected {}..{}" .format(initial_services, added_services, len(hashes), total_penalty, min_penalty, max_penalty))
def test_KeepBasicRWTest(self): run_test_server.authorize_with('active') keep_client = arvados.KeepClient() foo_locator = keep_client.put('foo') self.assertRegexpMatches( foo_locator, r'^acbd18db4cc2f85cedef654fccc4a4d8\+3\+A[a-f0-9]+@[a-f0-9]+$', 'invalid locator from Keep.put("foo"): ' + foo_locator) self.assertEqual(keep_client.get(foo_locator), 'foo', 'wrong content from Keep.get(md5("foo"))') # GET with an unsigned locator => NotFound bar_locator = keep_client.put('bar') unsigned_bar_locator = "37b51d194a7513e45b56f6524f2d51f2+3" self.assertRegexpMatches( bar_locator, r'^37b51d194a7513e45b56f6524f2d51f2\+3\+A[a-f0-9]+@[a-f0-9]+$', 'invalid locator from Keep.put("bar"): ' + bar_locator) self.assertRaises(arvados.errors.NotFoundError, keep_client.get, unsigned_bar_locator) # GET from a different user => NotFound run_test_server.authorize_with('spectator') self.assertRaises(arvados.errors.NotFoundError, arvados.Keep.get, bar_locator) # Unauthenticated GET for a signed locator => NotFound # Unauthenticated GET for an unsigned locator => NotFound keep_client.api_token = '' self.assertRaises(arvados.errors.NotFoundError, keep_client.get, bar_locator) self.assertRaises(arvados.errors.NotFoundError, keep_client.get, unsigned_bar_locator)
def setUpClass(cls): super(KeepTestCase, cls).setUpClass() run_test_server.authorize_with("admin") cls.api_client = arvados.api('v1') cls.keep_client = arvados.KeepClient(api_client=cls.api_client, proxy='', local_store='')
def test_probe_order_reference_set(self): # expected_order[i] is the probe order for # hash=md5(sprintf("%064x",i)) where there are 16 services # with uuid sprintf("anything-%015x",j) with j in 0..15. E.g., # the first probe for the block consisting of 64 "0" # characters is the service whose uuid is # "zzzzz-bi6l4-000000000000003", so expected_order[0][0]=='3'. expected_order = [ list('3eab2d5fc9681074'), list('097dba52e648f1c3'), list('c5b4e023f8a7d691'), list('9d81c02e76a3bf54'), ] hashes = [ hashlib.md5("{:064x}".format(x)).hexdigest() for x in range(len(expected_order)) ] api_client = self.mock_keep_services(count=16) keep_client = arvados.KeepClient(api_client=api_client) for i, hash in enumerate(hashes): roots = keep_client.weighted_service_roots(hash) got_order = [ re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1) for root in roots ] self.assertEqual(expected_order[i], got_order)
def test_oddball_service_get(self): body = 'oddball service get' api_client = self.mock_keep_services(service_type='fancynewblobstore') with tutil.mock_keep_responses(body, 200): keep_client = arvados.KeepClient(api_client=api_client) actual = keep_client.get(tutil.str_keep_locator(body)) self.assertEqual(body, actual)
def check_no_services_error(self, verb, exc_class): api_client = mock.MagicMock(name='api_client') api_client.keep_services().accessible().execute.side_effect = ( arvados.errors.ApiError) keep_client = arvados.KeepClient(api_client=api_client) with self.assertRaises(exc_class) as err_check: getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0') self.assertEqual(0, len(err_check.exception.request_errors()))
def test_oddball_service_put(self): body = 'oddball service put' pdh = tutil.str_keep_locator(body) api_client = self.mock_keep_services(service_type='fancynewblobstore') with tutil.mock_keep_responses(pdh, 200): keep_client = arvados.KeepClient(api_client=api_client) actual = keep_client.put(body, copies=1) self.assertEqual(pdh, actual)
def test_put_error_does_not_include_successful_puts(self): data = 'partial failure test' data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data)) api_client = self.mock_keep_services(count=3) with tutil.mock_put_responses(data_loc, 200, 500, 500) as req_mock, \ self.assertRaises(arvados.errors.KeepWriteError) as exc_check: keep_client = arvados.KeepClient(api_client=api_client) keep_client.put(data) self.assertEqual(2, len(exc_check.exception.request_errors()))
def test_put_timeout(self): api_client = self.mock_keep_services(count=1) force_timeout = [socket.timeout("timed out")] with tutil.mock_put(force_timeout) as mock_session: keep_client = arvados.KeepClient(api_client=api_client) with self.assertRaises(arvados.errors.KeepWriteError): keep_client.put('foo') self.assertTrue(mock_session.return_value.put.called) self.assertEqual( arvados.KeepClient.DEFAULT_TIMEOUT, mock_session.return_value.put.call_args[1]['timeout'])
def test_put_timeout(self): api_client = self.mock_keep_services(count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: keep_client = arvados.KeepClient(api_client=api_client) with self.assertRaises(arvados.errors.KeepWriteError): keep_client.put('foo') self.assertEqual( mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS), int(arvados.KeepClient.DEFAULT_TIMEOUT[0] * 1000)) self.assertEqual(mock.responses[0].getopt(pycurl.TIMEOUT_MS), int(arvados.KeepClient.DEFAULT_TIMEOUT[1] * 1000))
def test_oddball_service_writer_count(self): body = 'oddball service writer count' pdh = tutil.str_keep_locator(body) api_client = self.mock_keep_services(service_type='fancynewblobstore', count=4) headers = {'x-keep-replicas-stored': 3} with tutil.mock_keep_responses(pdh, 200, 418, 418, 418, **headers) as req_mock: keep_client = arvados.KeepClient(api_client=api_client) actual = keep_client.put(body, copies=2) self.assertEqual(pdh, actual) self.assertEqual(1, req_mock.call_count)
def test_KeepProxyTest1(self): # Will use ARVADOS_KEEP_PROXY environment variable that is set by # setUpClass(). keep_client = arvados.KeepClient(api_client=self.api_client, local_store='') baz_locator = keep_client.put('baz') self.assertRegexpMatches( baz_locator, '^73feffa4b7f6bb68e44cf984c85f6e88\+3', 'wrong md5 hash from Keep.put("baz"): ' + baz_locator) self.assertEqual(keep_client.get(baz_locator), 'baz', 'wrong content from Keep.get(md5("baz"))') self.assertTrue(keep_client.using_proxy)
def check_errors_from_last_retry(self, verb, exc_class): api_client = self.mock_keep_services(count=2) req_mock = getattr(tutil, 'mock_{}_responses'.format(verb))( "retry error reporting test", 500, 500, 403, 403) with req_mock, tutil.skip_sleep, \ self.assertRaises(exc_class) as err_check: keep_client = arvados.KeepClient(api_client=api_client) getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0', num_retries=3) self.assertEqual([403, 403], [ getattr(error, 'status_code', None) for error in err_check.exception.request_errors().itervalues() ])
def test_proxy_put_with_no_writable_services(self): data = 'test with no writable services' data_loc = tutil.str_keep_locator(data) api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1) with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \ self.assertRaises(arvados.errors.KeepWriteError) as exc_check: keep_client = arvados.KeepClient(api_client=api_client) keep_client.put(data) self.assertEqual( True, ("no Keep services available" in str(exc_check.exception))) self.assertEqual(0, len(exc_check.exception.request_errors()))
def test_KeepProxyTest2(self): # Don't instantiate the proxy directly, but set the X-External-Client # header. The API server should direct us to the proxy. arvados.config.settings()['ARVADOS_EXTERNAL_CLIENT'] = 'true' keep_client = arvados.KeepClient(api_client=self.api_client, proxy='', local_store='') baz_locator = keep_client.put('baz2') self.assertRegexpMatches( baz_locator, '^91f372a266fe2bf2823cb8ec7fda31ce\+4', 'wrong md5 hash from Keep.put("baz2"): ' + baz_locator) self.assertEqual(keep_client.get(baz_locator), 'baz2', 'wrong content from Keep.get(md5("baz2"))') self.assertTrue(keep_client.using_proxy)
def test_proxy_get_timeout(self): api_client = self.mock_keep_services(service_type='proxy', count=1) force_timeout = socket.timeout("timed out") with tutil.mock_keep_responses(force_timeout, 0) as mock: keep_client = arvados.KeepClient(api_client=api_client) with self.assertRaises(arvados.errors.KeepReadError): keep_client.get('ffffffffffffffffffffffffffffffff') self.assertEqual( mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS), int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0] * 1000)) self.assertEqual(mock.responses[0].getopt(pycurl.LOW_SPEED_TIME), int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1])) self.assertEqual(mock.responses[0].getopt(pycurl.LOW_SPEED_LIMIT), int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[2]))
def test_proxy_put_timeout(self): # Force a timeout, verifying that the requests.get or # requests.put method was called with the proxy_timeout # setting rather than the default timeout. api_client = self.mock_keep_services(service_type='proxy', count=1) force_timeout = [socket.timeout("timed out")] with tutil.mock_put(force_timeout) as mock_session: keep_client = arvados.KeepClient(api_client=api_client) with self.assertRaises(arvados.errors.KeepWriteError): keep_client.put('foo') self.assertTrue(mock_session.return_value.put.called) self.assertEqual( arvados.KeepClient.DEFAULT_PROXY_TIMEOUT, mock_session.return_value.put.call_args[1]['timeout'])
def mock_disks_and_gateways(self, disks=3, gateways=1): self.gateways = [{ 'uuid': 'zzzzz-bi6l4-gateway{:08d}'.format(i), 'owner_uuid': 'zzzzz-tpzed-000000000000000', 'service_host': 'gatewayhost{}'.format(i), 'service_port': 12345, 'service_ssl_flag': True, 'service_type': 'gateway:test', } for i in range(gateways)] self.gateway_roots = [ "https://{service_host}:{service_port}/".format(**gw) for gw in self.gateways ] self.api_client = self.mock_keep_services( count=disks, additional_services=self.gateways) self.keepClient = arvados.KeepClient(api_client=self.api_client)
def check_64_zeros_error_order(self, verb, exc_class): data = '0' * 64 if verb == 'get': data = hashlib.md5(data).hexdigest() + '+1234' # Arbitrary port number: aport = random.randint(1024, 65535) api_client = self.mock_keep_services(service_port=aport, count=16) keep_client = arvados.KeepClient(api_client=api_client) with mock.patch('requests.' + verb, side_effect=socket.timeout) as req_mock, \ self.assertRaises(exc_class) as err_check: getattr(keep_client, verb)(data) urls = [ urlparse.urlparse(url) for url in err_check.exception.request_errors() ] self.assertEqual([('keep0x' + c, aport) for c in '3eab2d5fc9681074'], [(url.hostname, url.port) for url in urls])
def check_64_zeros_error_order(self, verb, exc_class): data = '0' * 64 if verb == 'get': data = tutil.str_keep_locator(data) # Arbitrary port number: aport = random.randint(1024, 65535) api_client = self.mock_keep_services(service_port=aport, count=self.services) keep_client = arvados.KeepClient(api_client=api_client) with mock.patch('pycurl.Curl') as curl_mock, \ self.assertRaises(exc_class) as err_check: curl_mock.return_value.side_effect = socket.timeout getattr(keep_client, verb)(data) urls = [ urlparse.urlparse(url) for url in err_check.exception.request_errors() ] self.assertEqual([('keep0x' + c, aport) for c in '3eab2d5fc9681074'], [(url.hostname, url.port) for url in urls])
def setUp(self): # expected_order[i] is the probe order for # hash=md5(sprintf("%064x",i)) where there are 16 services # with uuid sprintf("anything-%015x",j) with j in 0..15. E.g., # the first probe for the block consisting of 64 "0" # characters is the service whose uuid is # "zzzzz-bi6l4-000000000000003", so expected_order[0][0]=='3'. self.services = 16 self.expected_order = [ list('3eab2d5fc9681074'), list('097dba52e648f1c3'), list('c5b4e023f8a7d691'), list('9d81c02e76a3bf54'), ] self.blocks = [ "{:064x}".format(x) for x in range(len(self.expected_order)) ] self.hashes = [ hashlib.md5(self.blocks[x]).hexdigest() for x in range(len(self.expected_order)) ] self.api_client = self.mock_keep_services(count=self.services) self.keep_client = arvados.KeepClient(api_client=self.api_client)
def main(arguments=None): args = parse_arguments(arguments) logger.info( "Creating test collection with (min={}, max={}) files per directory and a tree depth of (min={}, max={}) and (min={}, max={}) subdirs in each depth level..." .format(args.min_files, args.max_files, args.min_depth, args.max_depth, args.min_subdirs, args.max_subdirs)) api = arvados.api('v1', timeout=5 * 60) max_filesize = 1024 * 1024 data_block = ''.join( [random.choice(string.printable) for i in range(max_filesize)]) data_loc = arvados.KeepClient(api).put(data_block) streams = create_substreams(random.randint(args.min_depth, args.max_depth), '.', max_filesize, data_loc, args) manifest = '' for s in streams: if len(manifest) + len(s) > max_manifest_size: logger.info( "Skipping stream {} to avoid making a manifest bigger than 128MiB" .format(s.split(' ')[0])) break manifest += s + '\n' try: coll_name = get_random_name(False) coll = api.collections().create(body={ "collection": { "name": coll_name, "manifest_text": manifest }, }).execute() except: logger.info( "ERROR creating collection with name '{}' and manifest:\n'{}...'\nSize: {}" .format(coll_name, manifest[0:1024], len(manifest))) raise logger.info("Created collection {} - manifest size: {}".format( coll["uuid"], len(manifest))) return 0
def setUpClass(cls): super(CollectionBenchmark, cls).setUpClass() run_test_server.authorize_with('active') cls.api_client = arvados.api('v1') cls.keep_client = arvados.KeepClient(api_client=cls.api_client, local_store=cls.local_store)
def localkeep(self): if 'keep' not in self.local.__dict__: self.local.keep = arvados.KeepClient(api_client=self.localapi(), block_cache=self.block_cache) return self.local.keep
def keep_client(self): return arvados.KeepClient(proxy='http://[%s]:1' % (tutil.TEST_HOST, ), local_store='')
def setUp(self): self.api_client = self.mock_keep_services(count=2) self.keep_client = arvados.KeepClient(api_client=self.api_client)
def new_client(self, **caller_kwargs): kwargs = self.client_kwargs.copy() kwargs.update(caller_kwargs) return arvados.KeepClient(**kwargs)
def get_service_roots(self, *services): api_client = self.mock_keep_services(*services) keep_client = arvados.KeepClient(api_client=api_client) services = keep_client.shuffled_service_roots('000000') return [urlparse.urlparse(url) for url in sorted(services)]
def setUp(self): super(KeepOptionalPermission, self).setUp() self.keep_client = arvados.KeepClient(api_client=self.api_client, proxy='', local_store='')
def get_service_roots(self, api_client): keep_client = arvados.KeepClient(api_client=api_client) services = keep_client.weighted_service_roots('000000') return [urlparse.urlparse(url) for url in sorted(services)]