def _fetch_cipd_client(disk_cache, instance_id, fetch_url, timeoutfn): """Fetches cipd binary to |disk_cache|. Retries requests with exponential back-off. Raises: Error if could not fetch content. """ sleep_time = 1 for attempt in xrange(5): if attempt > 0: if timeoutfn() is not None and timeoutfn() < sleep_time: raise Error('Could not fetch CIPD client: timeout') logging.warning('Will retry to fetch CIPD client in %ds', sleep_time) time.sleep(sleep_time) sleep_time *= 2 try: res = net.url_open(fetch_url, timeout=timeoutfn()) if res: disk_cache.write(instance_id, res.iter_content(64 * 1024)) return except net.TimeoutError as ex: raise Error('Could not fetch CIPD client: %s', ex) except net.NetError as ex: logging.warning('Could not fetch CIPD client on attempt #%d: %s', attempt + 1, ex) raise Error('Could not fetch CIPD client after 5 retries')
def trigger_task(swarming_url, progress, unique, timeout, index): """Triggers a Swarming job and collects results. Returns the total amount of time to run a task remotely, including all the overhead. """ name = 'load-test-%d-%s' % (index, unique) start = time.time() logging.info('trigger') manifest = swarming.Manifest( None, name, 1, None, swarming_load_test_bot.OS_NAME, '', 'http://localhost:1', False, False, 100, None) data = {'request': manifest.to_json()} response = net.url_open(swarming_url + '/test', data=data) if not response: # Failed to trigger. Return a failure. return 'failed_trigger' result = json.load(response) test_key = result['test_keys'][0].pop('test_key') assert test_key expected = { 'test_case_name': name, 'test_keys': [ { 'config_name': swarming_load_test_bot.OS_NAME, 'num_instances': 1, 'instance_index': 0, }, ], } assert result == expected, result progress.update_item('%5d' % index, processing=1) try: logging.info('collect') test_keys = swarming.get_test_keys(swarming_url, name) if not test_keys: return 'no_test_keys' assert test_keys == [test_key], test_keys out = [ output for _index, output in swarming.yield_results( swarming_url, test_keys, timeout, None) ] if not out: return 'no_result' out[0].pop('machine_tag') out[0].pop('machine_id') expected = [ { u'config_instance_index': 0, u'exit_codes': u'0', u'num_config_instances': 1, u'output': swarming_load_test_bot.TASK_OUTPUT, }, ] assert out == expected, '\n%s\n%s' % (out, expected) return time.time() - start finally: progress.update_item('%5d - done' % index, processing=-1, processed=1)
def fetch(self, digest, _size, offset): assert offset >= 0 source_url = '%s/api/isolateservice/v1/retrieve' % (self._base_url) logging.debug('download_file(%s, %d)', source_url, offset) response = self._do_fetch(source_url, digest, offset) if not response: raise IOError( 'Attempted to fetch from %s; no data exist: %s / %s.' % (source_url, self._namespace, digest)) # for DB uploads content = response.get('content') if content is not None: yield base64.b64decode(content) return if not response.get('url'): raise IOError('Invalid response while fetching %s: %s' % (digest, response)) # for GS entities connection = net.url_open(response['url']) if not connection: raise IOError('Failed to download %s / %s' % (self._namespace, digest)) # If |offset|, verify server respects it by checking Content-Range. if offset: content_range = connection.get_header('Content-Range') if not content_range: raise IOError('Missing Content-Range header') # 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'. # According to a spec, <size> can be '*' meaning "Total size of the file # is not known in advance". try: match = re.match(r'bytes (\d+)-(\d+)/(\d+|\*)', content_range) if not match: raise ValueError() content_offset = int(match.group(1)) last_byte_index = int(match.group(2)) size = None if match.group(3) == '*' else int(match.group(3)) except ValueError: raise IOError('Invalid Content-Range header: %s' % content_range) # Ensure returned offset equals requested one. if offset != content_offset: raise IOError( 'Expecting offset %d, got %d (Content-Range is %s)' % (offset, content_offset, content_range)) # Ensure entire tail of the file is returned. if size is not None and last_byte_index + 1 != size: raise IOError('Incomplete response. Content-Range: %s' % content_range) for data in connection.iter_content(NET_IO_FILE_CHUNK): yield data
def _do_push(self, push_state, content): """Uploads isolated file to the URL. Used only for storing files, not for API calls. Can be overridden in subclasses. Args: url: URL to upload the data to. push_state: an _IsolateServicePushState instance item: the original Item to be uploaded content: an iterable that yields 'str' chunks. """ # A cheezy way to avoid memcpy of (possibly huge) file, until streaming # upload support is implemented. if isinstance(content, list) and len(content) == 1: content = content[0] else: content = b''.join(content) # DB upload if not push_state.finalize_url: url = '%s/%s' % (self.server_ref.url, push_state.upload_url) content = base64.b64encode(content) data = { 'upload_ticket': push_state.preupload_status['upload_ticket'], 'content': six.ensure_str(content), } response = net.url_read_json(url=url, data=data) return response is not None and response.get('ok') # upload to GS url = push_state.upload_url response = net.url_open( content_type='application/octet-stream', data=content, method='PUT', headers={'Cache-Control': 'public, max-age=31536000'}, url=url) if not response: return False try: response.read() except net.TimeoutError: return False # Integrity check of uploaded file. # https://cloud.google.com/storage/docs/xml-api/reference-headers#xgooghash goog_hash = response.headers.get('x-goog-hash') assert goog_hash, response.headers md5_x_goog_hash = 'md5=' + six.ensure_str( base64.b64encode(hashlib.md5(content).digest())) return md5_x_goog_hash in goog_hash
def trigger_task(swarming_url, dimensions, progress, unique, timeout, index): """Triggers a Swarming job and collects results. Returns the total amount of time to run a task remotely, including all the overhead. """ name = 'load-test-%d-%s' % (index, unique) start = time.time() logging.info('trigger') manifest = swarming.Manifest( isolate_server='http://localhost:1', namespace='dummy-isolate', isolated_hash=1, task_name=name, shards=1, env={}, dimensions=dimensions, working_dir=None, deadline=3600, verbose=False, profile=False, priority=100) # TODO(maruel): Make output size configurable. # TODO(maruel): Make number of shards configurable. output_size = 100 cmd = ['python', '-c', 'print(\'1\'*%s)' % output_size] manifest.add_task('echo stuff', cmd) data = {'request': manifest.to_json()} response = net.url_open(swarming_url + '/test', data=data) if not response: # Failed to trigger. Return a failure. return 'failed_trigger' result = json.load(response) test_key = result['test_keys'][0].pop('test_key') assert test_key expected = { 'test_case_name': name, 'test_keys': [ { # Old API uses harcoded config name. 'config_name': 'isolated', 'num_instances': 1, 'instance_index': 0, }, ], } if result != expected: # New API doesn't have concept of config name so it uses the task name. expected['test_keys'][0]['config_name'] = name assert result == expected, '%s\n%s' % (result, expected) progress.update_item('%5d' % index, processing=1) try: logging.info('collect') test_keys = swarming.get_task_keys(swarming_url, name) if not test_keys: return 'no_test_keys' assert test_keys == [test_key], test_keys out = [ output for _index, output in swarming.yield_results( swarming_url, test_keys, timeout, None, False, None) ] if not out: return 'no_result' out[0].pop('machine_tag') out[0].pop('machine_id') expected = [ { u'config_instance_index': 0, u'exit_codes': u'0', u'num_config_instances': 1, u'output': swarming_load_test_bot.TASK_OUTPUT, }, ] assert out == expected, '\n%s\n%s' % (out, expected) return time.time() - start finally: progress.update_item('%5d - done' % index, processing=-1, processed=1)
def call(self, mode, sleep_duration, **kwargs): url = self.server.url + '/%s/%f' % (mode, sleep_duration) kwargs['max_attempts'] = 2 return net.url_open(url, **kwargs)
def _run(self): """Polls the server and fake execution.""" try: self._progress.update_item('%d alive' % self._index, bots=1) while True: if self._kill_event.is_set(): return data = {'attributes': json.dumps(self._attributes)} request = net.url_open(self._swarming + '/poll_for_test', data=data) if request is None: self._events.put('poll_for_test_empty') continue start = time.time() try: manifest = json.load(request) except ValueError: self._progress.update_item('Failed to poll') self._events.put('poll_for_test_invalid') continue commands = [c['function'] for c in manifest.get('commands', [])] if not commands: # Nothing to run. self._events.put('sleep') time.sleep(manifest['come_back']) continue if commands == ['UpdateSlave']: # Calculate the proper SHA-1 and loop again. # This could happen if the Swarming server is upgraded while this # script runs. self._attributes['version'] = calculate_version( manifest['commands'][0]['args']) self._events.put('update_slave') continue if commands != ['RunManifest']: self._progress.update_item( 'Unexpected RPC call %s\n%s' % (commands, manifest)) self._events.put('unknown_rpc') break store_cmd = manifest['commands'][0] if not isinstance(store_cmd['args'], unicode): self._progress.update_item('Unexpected RPC manifest\n%s' % manifest) self._events.put('unknown_args') break result_url = manifest['result_url'] test_run = json.loads(store_cmd['args']) if result_url != test_run['result_url']: self._progress.update_item( 'Unexpected result url: %s != %s' % (result_url, test_run['result_url'])) self._events.put('invalid_result_url') break ping_url = test_run['ping_url'] ping_delay = test_run['ping_delay'] self._progress.update_item('%d processing' % self._index, processing=1) # Fake activity and send pings as requested. while True: remaining = max(0, (start + self._duration) - time.time()) if remaining > ping_delay: # Include empty data to ensure the request is a POST request. result = net.url_read(ping_url, data={}) assert result == 'Success.', result remaining = max(0, (start + self._duration) - time.time()) if not remaining: break time.sleep(remaining) # In the old API, r=<task_id>&id=<bot_id> is passed as the url. data = { 'o': TASK_OUTPUT, 'x': '0', } result = net.url_read(manifest['result_url'], data=data) self._progress.update_item( '%d processed' % self._index, processing=-1, processed=1) if not result: self._events.put('result_url_fail') else: assert result == 'Successfully update the runner results.', result self._events.put(time.time() - start) finally: try: # Unregister itself. Otherwise the server will have tons of fake slaves # that the admin will have to remove manually. response = net.url_open( self._swarming + '/delete_machine_stats', data=[('r', self._bot_id)]) if not response: self._events.put('failed_unregister') else: response.read() finally: self._progress.update_item('%d quit' % self._index, bots=-1)
def _run(self): try: self._progress.update_item('%d alive' % self._index, bots=1) while True: if self._kill_event.is_set(): return data = {'attributes': json.dumps(self._attributes)} request = net.url_open(self._swarming + '/poll_for_test', data=data) if request is None: self._events.put('poll_for_test_empty') continue start = time.time() try: manifest = json.load(request) except ValueError: self._progress.update_item('Failed to poll') self._events.put('poll_for_test_invalid') continue commands = [c['function'] for c in manifest.get('commands', [])] if not commands: # Nothing to run. self._events.put('sleep') time.sleep(manifest['come_back']) continue if commands == ['UpdateSlave']: # Calculate the proper SHA-1 and loop again. # This could happen if the Swarming server is upgraded while this # script runs. self._attributes['version'] = calculate_version( manifest['commands'][0]['args']) self._events.put('update_slave') continue if commands != ['StoreFiles', 'RunCommands']: self._progress.update_item( 'Unexpected RPC call %s\n%s' % (commands, manifest)) self._events.put('unknown_rpc') break # The normal way Swarming works is that it 'stores' a test_run.swarm # file and then defer control to swarm_bot/local_test_runner.py. store_cmd = manifest['commands'][0] assert len(store_cmd['args']) == 1, store_cmd['args'] filepath, filename, test_run_content = store_cmd['args'][0] assert filepath == '' assert filename == 'test_run.swarm' assert 'local_test_runner.py' in manifest['commands'][1]['args'][0], ( manifest['commands'][1]) result_url = manifest['result_url'] test_run = json.loads(test_run_content) assert result_url == test_run['result_url'] ping_url = test_run['ping_url'] ping_delay = test_run['ping_delay'] self._progress.update_item('%d processing' % self._index, processing=1) # Fake activity and send pings as requested. while True: remaining = max(0, (start + self._duration) - time.time()) if remaining > ping_delay: # Include empty data to ensure the request is a POST request. result = net.url_read(ping_url, data={}) assert result == 'Success.', result remaining = max(0, (start + self._duration) - time.time()) if not remaining: break time.sleep(remaining) data = { 'c': test_run['configuration']['config_name'], 'n': test_run['test_run_name'], 'o': False, 'result_output': TASK_OUTPUT, 's': True, 'x': '0', } result = net.url_read(manifest['result_url'], data=data) self._progress.update_item( '%d processed' % self._index, processing=-1, processed=1) if not result: self._events.put('result_url_fail') else: assert result == 'Successfully update the runner results.', result self._events.put(time.time() - start) finally: try: # Unregister itself. Otherwise the server will have tons of fake slaves # that the admin will have to remove manually. response = net.url_open( self._swarming + '/delete_machine_stats', data=[('r', self._machine_id)]) if not response: self._events.put('failed_unregister') else: response.read() finally: self._progress.update_item('%d quit' % self._index, bots=-1)
def trigger_task(swarming_url, dimensions, sleep_time, output_size, progress, unique, timeout, index): """Triggers a Swarming job and collects results. Returns the total amount of time to run a task remotely, including all the overhead. """ name = "load-test-%d-%s" % (index, unique) start = time.time() logging.info("trigger") manifest = swarming.Manifest( isolate_server="http://localhost:1", namespace="dummy-isolate", isolated_hash=1, task_name=name, extra_args=[], env={}, dimensions=dimensions, deadline=int(timeout - TIMEOUT_OVERHEAD), verbose=False, profile=False, priority=100, ) cmd = ["python", "-c", "import time; print('1'*%s); time.sleep(%d); print('Back')" % (output_size, sleep_time)] manifest.add_task("echo stuff", cmd) data = {"request": manifest.to_json()} response = net.url_open(swarming_url + "/test", data=data) if not response: # Failed to trigger. Return a failure. return "failed_trigger" result = json.load(response) # Old API uses harcoded config name. New API doesn't have concept of config # name so it uses the task name. Ignore this detail. test_keys = [] for key in result["test_keys"]: key.pop("config_name") test_keys.append(key.pop("test_key")) assert re.match("[0-9a-f]+", test_keys[-1]), test_keys expected = { u"priority": 100, u"test_case_name": unicode(name), u"test_keys": [{u"num_instances": 1, u"instance_index": 0}], } assert result == expected, "\n%s\n%s" % (result, expected) progress.update_item("%5d" % index, processing=1) try: logging.info("collect") new_test_keys = swarming.get_task_keys(swarming_url, name) if not new_test_keys: return "no_test_keys" assert test_keys == new_test_keys, (test_keys, new_test_keys) out = [output for _index, output in swarming.yield_results(swarming_url, test_keys, timeout, None, False, None)] if not out: return "no_result" for item in out: item.pop("machine_tag") item.pop("machine_id") # TODO(maruel): Assert output even when run on a real bot. _out_actual = item.pop("output") # assert out_actual == swarming_load_test_bot.TASK_OUTPUT, out_actual expected = [{u"config_instance_index": 0, u"exit_codes": u"0", u"num_config_instances": 1}] assert out == expected, "\n%s\n%s" % (out, expected) return time.time() - start finally: progress.update_item("%5d - done" % index, processing=-1, processed=1)
def trigger_task( swarming_url, dimensions, sleep_time, output_size, progress, unique, timeout, index): """Triggers a Swarming job and collects results. Returns the total amount of time to run a task remotely, including all the overhead. """ name = 'load-test-%d-%s' % (index, unique) start = time.time() logging.info('trigger') manifest = swarming.Manifest( isolate_server='http://localhost:1', namespace='dummy-isolate', isolated_hash=1, task_name=name, extra_args=[], env={}, dimensions=dimensions, working_dir=None, deadline=3600, verbose=False, profile=False, priority=100) cmd = [ 'python', '-c', 'import time; print(\'1\'*%s); time.sleep(%d); print(\'Back\')' % (output_size, sleep_time) ] manifest.add_task('echo stuff', cmd) data = {'request': manifest.to_json()} response = net.url_open(swarming_url + '/test', data=data) if not response: # Failed to trigger. Return a failure. return 'failed_trigger' result = json.load(response) # Old API uses harcoded config name. New API doesn't have concept of config # name so it uses the task name. Ignore this detail. test_keys = [] for key in result['test_keys']: key.pop('config_name') test_keys.append(key.pop('test_key')) assert re.match('[0-9a-f]+', test_keys[-1]), test_keys expected = { u'test_case_name': unicode(name), u'test_keys': [ { u'num_instances': 1, u'instance_index': 0, } ], } assert result == expected, '\n%s\n%s' % (result, expected) progress.update_item('%5d' % index, processing=1) try: logging.info('collect') new_test_keys = swarming.get_task_keys(swarming_url, name) if not new_test_keys: return 'no_test_keys' assert test_keys == new_test_keys, (test_keys, new_test_keys) out = [ output for _index, output in swarming.yield_results( swarming_url, test_keys, timeout, None, False, None) ] if not out: return 'no_result' for item in out: item.pop('machine_tag') item.pop('machine_id') # TODO(maruel): Assert output even when run on a real bot. _out_actual = item.pop('output') # assert out_actual == swarming_load_test_bot.TASK_OUTPUT, out_actual expected = [ { u'config_instance_index': 0, u'exit_codes': u'0', u'num_config_instances': 1, } ] assert out == expected, '\n%s\n%s' % (out, expected) return time.time() - start finally: progress.update_item('%5d - done' % index, processing=-1, processed=1)
def trigger_task(swarming_url, dimensions, progress, unique, timeout, index): """Triggers a Swarming job and collects results. Returns the total amount of time to run a task remotely, including all the overhead. """ name = 'load-test-%d-%s' % (index, unique) start = time.time() logging.info('trigger') manifest = swarming.Manifest(isolate_server='http://localhost:1', namespace='dummy-isolate', isolated_hash=1, task_name=name, shards=1, env={}, dimensions=dimensions, working_dir=None, deadline=3600, verbose=False, profile=False, priority=100) # TODO(maruel): Make output size configurable. # TODO(maruel): Make number of shards configurable. output_size = 100 cmd = ['python', '-c', 'print(\'1\'*%s)' % output_size] manifest.add_task('echo stuff', cmd) data = {'request': manifest.to_json()} response = net.url_open(swarming_url + '/test', data=data) if not response: # Failed to trigger. Return a failure. return 'failed_trigger' result = json.load(response) test_key = result['test_keys'][0].pop('test_key') assert test_key expected = { 'test_case_name': name, 'test_keys': [ { # Old API uses harcoded config name. 'config_name': 'isolated', 'num_instances': 1, 'instance_index': 0, }, ], } if result != expected: # New API doesn't have concept of config name so it uses the task name. expected['test_keys'][0]['config_name'] = name assert result == expected, '%s\n%s' % (result, expected) progress.update_item('%5d' % index, processing=1) try: logging.info('collect') test_keys = swarming.get_task_keys(swarming_url, name) if not test_keys: return 'no_test_keys' assert test_keys == [test_key], test_keys out = [ output for _index, output in swarming.yield_results( swarming_url, test_keys, timeout, None, False, None) ] if not out: return 'no_result' out[0].pop('machine_tag') out[0].pop('machine_id') expected = [ { u'config_instance_index': 0, u'exit_codes': u'0', u'num_config_instances': 1, u'output': swarming_load_test_bot.TASK_OUTPUT, }, ] assert out == expected, '\n%s\n%s' % (out, expected) return time.time() - start finally: progress.update_item('%5d - done' % index, processing=-1, processed=1)
def _run(self): """Polls the server and fake execution.""" try: self._progress.update_item('%d alive' % self._index, bots=1) while True: if self._kill_event.is_set(): return data = {'attributes': json.dumps(self._attributes)} request = net.url_open(self._swarming + '/poll_for_test', data=data) if request is None: self._events.put('poll_for_test_empty') continue start = time.time() try: manifest = json.load(request) except ValueError: self._progress.update_item('Failed to poll') self._events.put('poll_for_test_invalid') continue commands = [ c['function'] for c in manifest.get('commands', []) ] if not commands: # Nothing to run. self._events.put('sleep') time.sleep(manifest['come_back']) continue if commands == ['UpdateSlave']: # Calculate the proper SHA-1 and loop again. # This could happen if the Swarming server is upgraded while this # script runs. self._attributes['version'] = calculate_version( manifest['commands'][0]['args']) self._events.put('update_slave') continue if commands != ['RunManifest']: self._progress.update_item('Unexpected RPC call %s\n%s' % (commands, manifest)) self._events.put('unknown_rpc') break store_cmd = manifest['commands'][0] if not isinstance(store_cmd['args'], unicode): self._progress.update_item('Unexpected RPC manifest\n%s' % manifest) self._events.put('unknown_args') break result_url = manifest['result_url'] test_run = json.loads(store_cmd['args']) if result_url != test_run['result_url']: self._progress.update_item( 'Unexpected result url: %s != %s' % (result_url, test_run['result_url'])) self._events.put('invalid_result_url') break ping_url = test_run['ping_url'] ping_delay = test_run['ping_delay'] self._progress.update_item('%d processing' % self._index, processing=1) # Fake activity and send pings as requested. while True: remaining = max(0, (start + self._duration) - time.time()) if remaining > ping_delay: # Include empty data to ensure the request is a POST request. result = net.url_read(ping_url, data={}) assert result == 'Success.', result remaining = max(0, (start + self._duration) - time.time()) if not remaining: break time.sleep(remaining) # In the old API, r=<task_id>&id=<bot_id> is passed as the url. data = { 'o': TASK_OUTPUT, 'x': '0', } result = net.url_read(manifest['result_url'], data=data) self._progress.update_item('%d processed' % self._index, processing=-1, processed=1) if not result: self._events.put('result_url_fail') else: assert result == 'Successfully update the runner results.', result self._events.put(time.time() - start) finally: try: # Unregister itself. Otherwise the server will have tons of fake slaves # that the admin will have to remove manually. response = net.url_open(self._swarming + '/delete_machine_stats', data=[('r', self._bot_id)]) if not response: self._events.put('failed_unregister') else: response.read() finally: self._progress.update_item('%d quit' % self._index, bots=-1)