def _start_execution(self, stateMachineArn, name, input): def context(): return MockLambdaContext(StorageVisitation.shutdown_time + self.timeout) Config.set_config(BucketConfig.NORMAL) input = json.loads(input) state = implementation.job_initialize(input, context()) walker_state = copy.deepcopy(state) state['work_result'] = [] while walker_state['_status'] == 'init': walker_state = implementation.walker_initialize(walker_state, context(), 0) while walker_state['_status'] == 'walk': walker_state = implementation.walker_walk(walker_state, context(), 0) walker_state = implementation.walker_finalize(walker_state, context(), 0) self.assertEquals(walker_state['_status'], 'end') state['work_result'].append(walker_state['work_result']) state = implementation.job_finalize(state, context()) work_result = state['work_result'] for replica, missing, present in zip(state['replicas'], work_result['missing'], work_result['present']): keys = self.keys[Replica[replica]] self.assertEquals(self.num_keys, missing + present) self.assertEquals(len(keys), present) # Return the total number of missing keys (which is different between each test run because the overall number # of keys is random). The test can assert this value, proving that this was code actually run. return {'executionArn': str(sum(work_result['missing']))}
def touch_test_file(replica: Replica, dst_bucket: str) -> bool: """ Write a test file into the specified bucket. :param dst_bucket: the bucket to be checked. :param replica: the replica to execute the checkout in. :return: True if able to write, if not raise DestinationBucketNotWritableError. """ randomizer = ''.join(choices(hexdigits, k=2)) # Spreading the touch test file across a larger range prevents hitting a modification rate limits. test_object = f"touch/{randomizer}.txt" handle = Config.get_blobstore_handle(replica) try: handle.upload_file_handle( dst_bucket, test_object, io.BytesIO(b"")) return True except Exception as ex: raise DestinationBucketNotWritableError(ex) finally: try: Config.get_blobstore_handle(replica).delete(dst_bucket, test_object) except Exception: pass
def validate_file_dst(dst_bucket: str, dst_key: str, replica: Replica): try: Config.get_blobstore_handle(replica).get_user_metadata( dst_bucket, dst_key) return True except (BlobNotFoundError, BlobStoreUnknownError): return False
def test_s3_checkout_bucket(self): Config.set_config(BucketConfig.NORMAL) self.assertEquals(Config.get_s3_checkout_bucket(), os.environ["DSS_S3_CHECKOUT_BUCKET"]) Config.set_config(BucketConfig.TEST) self.assertEquals(Config.get_s3_checkout_bucket(), os.environ["DSS_S3_CHECKOUT_BUCKET_TEST"]) Config.set_config(BucketConfig.TEST_FIXTURE) self.assertEquals(Config.get_s3_checkout_bucket(), os.environ["DSS_S3_CHECKOUT_BUCKET_TEST"])
def test_s3_events_bucket(self): Config.set_config(BucketConfig.NORMAL) self.assertEqual(Config.get_flashflood_bucket(), os.environ["DSS_FLASHFLOOD_BUCKET"]) Config.set_config(BucketConfig.TEST) self.assertEqual(Config.get_flashflood_bucket(), os.environ["DSS_S3_BUCKET_TEST"]) Config.set_config(BucketConfig.TEST_FIXTURE) self.assertEqual(Config.get_flashflood_bucket(), os.environ["DSS_S3_BUCKET_TEST"])
def touch_test_file(dst_bucket: str, replica: Replica) -> bool: """ Write a test file into the specified bucket. :param bucket: the bucket to be checked. :return: True if able to write, if not also returns error message as a cause """ test_object = "touch.txt" handle = Config.get_blobstore_handle(replica) try: handle.upload_file_handle(dst_bucket, test_object, io.BytesIO(b"")) Config.get_blobstore_handle(replica).delete(dst_bucket, test_object) return True except Exception as e: return False
def delete_event_for_bundle(replica: Replica, key: str, flashflood_prefixes: typing.Tuple[str, ...] = None): """ Delete a bundle event from flashflood. This operation is eventually consistent, and will not take effect until flashflood.update() is called (typically by daemons/dss-event-scribe) """ fqid = key.split("/", 1)[1] if flashflood_prefixes is None: flashflood_prefixes = replica.flashflood_prefix_write for pfx in flashflood_prefixes: ff = Config.get_flashflood_handle(pfx) try: ff.delete_event(fqid) logger.info( json.dumps(dict(message="Deleted event", replica=replica.name, prefix=pfx, key=key), indent=4)) except FlashFloodEventNotFound: logger.warning( json.dumps(dict(message="Cannot delete nonexistent event", replica=replica.name, prefix=pfx, key=key), indent=4))
def record(argv: typing.List[str], args: argparse.Namespace): """ Record events for `keys` into flashflood prefix `prefix` If `keys` is omitted, record an event for each bundle in `replica` via lambda forwarding. """ replica = Replica[args.replica] job_id = args.job_id or f"{uuid4()}" cmd_template = (f"events record --job-id {job_id} " f"--prefix {args.prefix} " f"--replica {replica.name} " f"--keys {{keys}}") if args.keys is None: start_time = datetime.now() def forward_keys(bundle_fqids): with SQSMessenger(command_queue_url) as sqsm: for fqid in bundle_fqids: sqsm.send(cmd_template.format(keys=f"bundles/{fqid}")) handle = Config.get_blobstore_handle(replica) with ThreadPoolExecutor(max_workers=4) as e: for c in set(hexdigits.lower()): bundle_fqids = Living(handle.list_v2(replica.bucket, f"bundles/{c}")) e.submit(forward_keys, bundle_fqids) monitor_logs(logs, job_id, start_time) else: for key in args.keys: msg = json.dumps(dict(action="record event", job_id=job_id, replica=replica.name, key=key)) record_event_for_bundle(Replica[args.replica], key, (args.prefix,), use_version_for_timestamp=True) print(msg)
def mark_bundle_checkout_started(execution_id: str, replica: Replica, sts_bucket: str): handle = Config.get_blobstore_handle(replica) data = {_STATUS_KEY: "RUNNING"} handle.upload_file_handle(sts_bucket, _bundle_checkout_status_key(execution_id), io.BytesIO(json.dumps(data).encode("utf-8")))
def _test_bundle_delete(self, replica: Replica, fixtures_bucket: str, authorized: bool): schema = replica.storage_schema # prep existing bundle bundle_uuid = str(uuid.uuid4()) file_uuid = str(uuid.uuid4()) resp_obj = self.upload_file_wait( f"{schema}://{fixtures_bucket}/test_good_source_data/0", replica, file_uuid, bundle_uuid=bundle_uuid, ) file_version = resp_obj.json['version'] bundle_version = datetime_to_version_format(datetime.datetime.utcnow()) self.put_bundle( replica, bundle_uuid, [(file_uuid, file_version, "LICENSE")], bundle_version, ) handle = Config.get_blobstore_handle(replica) bucket = replica.bucket self.delete_bundle(replica, bundle_uuid, authorized=authorized) tombstone_exists = test_object_exists(handle, bucket, f"bundles/{bundle_uuid}.dead") self.assertEquals(tombstone_exists, authorized) self.delete_bundle(replica, bundle_uuid, bundle_version, authorized=authorized) tombstone_exists = test_object_exists(handle, bucket, f"bundles/{bundle_uuid}.{bundle_version}.dead") self.assertEquals(tombstone_exists, authorized)
def get_helper(uuid: str, replica: Replica, version: str = None): handle = Config.get_blobstore_handle(replica) bucket = replica.bucket if version is None: # list the files and find the one that is the most recent. prefix = "files/{}.".format(uuid) for matching_file in handle.list(bucket, prefix): matching_file = matching_file[len(prefix):] if version is None or matching_file > version: version = matching_file if version is None: # no matches! raise DSSException(404, "not_found", "Cannot find file!") # retrieve the file metadata. try: file_metadata = json.loads( handle.get(bucket, "files/{}.{}".format(uuid, version)).decode("utf-8")) except BlobNotFoundError as ex: raise DSSException(404, "not_found", "Cannot find file!") blob_path = "blobs/" + ".".join(( file_metadata[FileMetadata.SHA256], file_metadata[FileMetadata.SHA1], file_metadata[FileMetadata.S3_ETAG], file_metadata[FileMetadata.CRC32C], )) if request.method == "GET": """ Probabilistically return "Retry-After" header The retry-after interval can be relatively short now, but it sets up downstream libraries / users for success when we start integrating this with the checkout service. """ if random.randint(0, 100) < REDIRECT_PROBABILITY_PERCENTS: response = redirect(request.url, code=301) headers = response.headers headers['Retry-After'] = RETRY_AFTER_INTERVAL return response response = redirect( handle.generate_presigned_GET_url(bucket, blob_path)) else: response = make_response('', 200) headers = response.headers headers['X-DSS-BUNDLE-UUID'] = file_metadata[FileMetadata.BUNDLE_UUID] headers['X-DSS-CREATOR-UID'] = file_metadata[FileMetadata.CREATOR_UID] headers['X-DSS-VERSION'] = version headers['X-DSS-CONTENT-TYPE'] = file_metadata[FileMetadata.CONTENT_TYPE] headers['X-DSS-SIZE'] = file_metadata[FileMetadata.SIZE] headers['X-DSS-CRC32C'] = file_metadata[FileMetadata.CRC32C] headers['X-DSS-S3-ETAG'] = file_metadata[FileMetadata.S3_ETAG] headers['X-DSS-SHA1'] = file_metadata[FileMetadata.SHA1] headers['X-DSS-SHA256'] = file_metadata[FileMetadata.SHA256] return response
def _walk(self) -> None: """ Subclasses should not typically implement this method, which includes logic specific to calling self.process_item(*args) on each blob visited. """ start_time = time() handle = Config.get_blobstore_handle(Replica[self.replica]) blobs = handle.list_v2( self.bucket, prefix=self.work_id, start_after_key=self. marker, # type: ignore # Cannot determine type of 'marker' token=self. token # type: ignore # Cannot determine type of 'token' ) for key in blobs: if 250 < time() - start_time: break self.process_item(key) self.marker = blobs.start_after_key self.token = blobs.token else: self._status = WalkerStatus.finished.name
def _test_bundle_get_directaccess(self, replica: Replica): schema = replica.storage_schema bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198" version = "2017-06-20T214506.766634Z" url = str(UrlBuilder() .set(path="/v1/bundles/" + bundle_uuid) .add_query("replica", replica.name) .add_query("version", version) .add_query("directurls", "true")) with override_bucket_config(BucketConfig.TEST_FIXTURE): resp_obj = self.assertGetResponse( url, requests.codes.ok) url = resp_obj.json['bundle']['files'][0]['url'] splitted = urllib.parse.urlparse(url) self.assertEqual(splitted.scheme, schema) bucket = splitted.netloc key = splitted.path[1:] # ignore the / part of the path. handle = Config.get_blobstore_handle(replica) contents = handle.get(bucket, key) hasher = hashlib.sha1() hasher.update(contents) sha1 = hasher.hexdigest() self.assertEqual(sha1, "2b8b815229aa8a61e483fb4ba0588b8b6c491890")
def _configure_logging(test=False, **kwargs): root_logger = logging.getLogger() global _logging_configured if _logging_configured: root_logger.info( "Logging was already configured in this interpreter process. The currently " "registered handlers, formatters, filters and log levels will be left as is." ) else: root_logger.setLevel(logging.WARNING) if 'AWS_LAMBDA_LOG_GROUP_NAME' in os.environ: pass # On AWS Lambda, we assume that its runtime already configured logging appropriately elif len(root_logger.handlers) == 0: logging.basicConfig(**kwargs) else: # If this happens, the process can likely proceed but the underlying issue needs to be investigated. Some # module isn't playing nicely and configured logging before we had a chance to do so. The backtrace # included in the log message may look scary but it should aid in finding the culprit. root_logger.warning( "It appears that logging was already configured in this interpreter process. " "Currently registered handlers, formatters and filters will be left as is.", stack_info=True) debug = Config.debug_level() log_levels = main_log_levels if test: log_levels = {**log_levels, **test_log_levels} for logger, levels in log_levels.items(): if isinstance(logger, (str, type(None))): logger = logging.getLogger(logger) level = levels[min(debug, len(levels) - 1)] logger.setLevel(level) _logging_configured = True
def create_app(): app = DSSApp( __name__, validator_map={ 'body': DSSRequestBodyValidator, 'parameter': DSSParameterValidator, }, ) # The Flask/Connection app's logger has its own multi-line formatter and configuration. Rather than suppressing # it we let it do its thing, give it a special name and only enable it if DSS_DEBUG > 1. Most of the DSS web # app's logging is done through the DSSChaliceApp.app logger not the Flask app's logger. # app.app.logger_name = 'dss.api' debug = Config.debug_level() > 0 app.app.debug = debug app.app.logger.info('Flask debug is %s.', 'enabled' if debug else 'disabled') resolver = RestyResolver("dss.api", collection_endpoint_name="list") app.add_api('../dss-api.yml', resolver=resolver, validate_responses=True, arguments=os.environ) app.add_error_handler(DSSException, dss_exception_handler) return app
def _verify_checkout( replica: Replica, token: typing.Optional[str], file_metadata: dict, blob_path: str, ) -> typing.Tuple[str, bool]: cloud_handle = Config.get_blobstore_handle(replica) hca_handle = Config.get_hcablobstore_handle(replica) try: now = datetime.datetime.now(datetime.timezone.utc) creation_date = cloud_handle.get_creation_date(replica.checkout_bucket, blob_path) stale_after_date = creation_date + datetime.timedelta(days=int(os.environ['DSS_BLOB_PUBLIC_TTL_DAYS'])) expiration_date = (creation_date + datetime.timedelta(days=int(os.environ['DSS_BLOB_TTL_DAYS'])) - datetime.timedelta(hours=1)) if now < expiration_date: if now > stale_after_date: start_file_checkout(replica, blob_path) if hca_handle.verify_blob_checksum_from_dss_metadata(replica.checkout_bucket, blob_path, file_metadata): return "", True else: logger.error( f"Checksum verification failed for file {replica.checkout_bucket}/{blob_path}") except BlobNotFoundError: pass decoded_token: dict if token is None: execution_id = start_file_checkout(replica, blob_path) start_time = time.time() attempts = 0 decoded_token = { CheckoutTokenKeys.EXECUTION_ID: execution_id, CheckoutTokenKeys.START_TIME: start_time, CheckoutTokenKeys.ATTEMPTS: attempts } else: try: decoded_token = json.loads(token) decoded_token[CheckoutTokenKeys.ATTEMPTS] += 1 except (KeyError, ValueError) as ex: raise DSSException(requests.codes.bad_request, "illegal_token", "Could not understand token", ex) encoded_token = json.dumps(decoded_token) return encoded_token, False
def test_gcloud_reties(self): Config.get_native_handle.cache_clear() Config.BLOBSTORE_RETRIES = 1 handle = Config.get_native_handle(Replica.gcp) for adapter in handle._http.adapters.values(): self.assertEqual(Config.BLOBSTORE_RETRIES, adapter.max_retries.total)
def update_flashflood(prefix: str, number_of_updates_to_apply=1000) -> int: """ Apply event updates to existing journals. This is typically called after journaling is complete. """ ff = Config.get_flashflood_handle(prefix, confirm_writes=True) number_of_updates_applied = ff.update(number_of_updates_to_apply) return number_of_updates_applied
def __init__(self, timeout: float = 60, delay: float = 10) -> None: elasticsearch_binary = os.getenv("DSS_TEST_ES_PATH", "elasticsearch") tempdir = tempfile.TemporaryDirectory() # Set Elasticsearch's initial and max heap to 1.6 GiB, 40% of what's available on Travis, according to # guidance from https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html env = dict(os.environ, ES_JAVA_OPTIONS="-Xms1638m -Xmx1638m") # Work around https://github.com/travis-ci/travis-ci/issues/8408 if '_JAVA_OPTIONS' in env: # no coverage logger.warning( "_JAVA_OPTIONS is set. This may override the options just set via ES_JAVA_OPTIONS." ) port = networking.unused_tcp_port() transport_port = networking.unused_tcp_port() args = [ elasticsearch_binary, "-E", f"http.port={port}", "-E", f"transport.tcp.port={transport_port}", "-E", f"path.data={tempdir.name}", "-E", "logger.org.elasticsearch=" + ("info" if Config.debug_level() > 0 else "warn") ] logger.info("Running %r with environment %r", args, env) proc = subprocess.Popen(args, env=env) def check(): status = proc.poll() if status is not None: tempdir.cleanup() raise ChildProcessError( f"ES process died with status {status}") deadline = time.time() + timeout while True: check() time.sleep(delay) check() logger.info('Attempting to connect to ES instance at 127.0.0.1:%i', port) try: sock = socket.create_connection(("127.0.0.1", port), 1) except (ConnectionRefusedError, socket.timeout): logger.debug( 'Failed connecting to ES instance at 127.0.0.1:%i', port, exc_info=True) if time.time() + delay > deadline: proc.kill() tempdir.cleanup() raise else: sock.close() check() self.port = port self.proc = proc self.tempdir = tempdir break
def setUp(self): dss.Config.set_config(dss.BucketConfig.NORMAL) self.gs_bucket_name, self.s3_bucket_name = dss.Config.get_gs_bucket( ), dss.Config.get_s3_bucket() self.logger = logging.getLogger(__name__) self.gs = Config.get_native_handle(Replica.gcp) self.gs_bucket = self.gs.bucket(self.gs_bucket_name) self.s3 = boto3.resource("s3") self.s3_bucket = self.s3.Bucket(self.s3_bucket_name)
def validate_dst_bucket(dst_bucket: str, replica: Replica) -> typing.Tuple[ValidationEnum, str]: if not Config.get_blobstore_handle(replica).check_bucket_exists( dst_bucket): return ValidationEnum.WRONG_DST_BUCKET, f"Bucket {dst_bucket} doesn't exist" if not touch_test_file(dst_bucket, replica): return ValidationEnum.WRONG_PERMISSIONS_DST_BUCKET, f"Insufficient permissions on bucket {dst_bucket}" return ValidationEnum.PASSED, None
def job_finalize(self): super().job_finalize() handle = Config.get_blobstore_handle(Replica[self.replica]) listed_keys = handle.list(self.bucket, prefix=self.prefix) k_listed = sum(1 for _ in listed_keys) assert self.work_result == k_listed, f'Integration test failed: {self.work_result} != {k_listed}' logger.info( f"Integration test passed for {self.replica} with {k_listed} key(s) listed" )
def _test_delete_event_for_bundle(self, replica, prefixes, key): ff = mock.MagicMock() with mock.patch("dss.events.Config.get_flashflood_handle", return_value=ff): events.delete_event_for_bundle(replica, key, prefixes) used_prefixes = prefixes or replica.flashflood_prefix_write self.assertEqual(len(used_prefixes), ff.delete_event.call_count) for args, pfx in zip(ff.call_args_list, used_prefixes): expected = ((resources.s3, Config.get_flashflood_bucket(), pfx),) self.assertEqual(args, expected)
def test_boto_timeout(self): Config.get_native_handle.cache_clear() Config.BLOBSTORE_CONNECT_TIMEOUT = 1 Config.BLOBSTORE_READ_TIMEOUT = 2 Config.BLOBSTORE_BOTO_RETRIES = 3 client_config = Config.get_native_handle(Replica.aws)._client_config self.assertEqual(Config.BLOBSTORE_CONNECT_TIMEOUT, client_config.connect_timeout) self.assertEqual(Config.BLOBSTORE_READ_TIMEOUT, client_config.read_timeout) self.assertEqual(Config.BLOBSTORE_BOTO_RETRIES, client_config.retries['max_attempts'])
def put_status_succeeded(execution_id: str, dst_replica: Replica, dst_bucket: str, dst_location: str): handle = Config.get_blobstore_handle(Replica.aws) data = { "status": 'SUCCEEDED', "location": f"{dst_replica.storage_schema}://{dst_bucket}/{dst_location}" } handle.upload_file_handle(Replica.aws.checkout_bucket, status_file_name(execution_id), io.BytesIO(json.dumps(data).encode("utf-8")))
def _test_record_event_for_bundle(self, replica, prefixes, metadata_document, key): with mock.patch("dss.events.build_bundle_metadata_document", return_value=metadata_document): ff = mock.MagicMock() ff.event_exists = mock.MagicMock(return_value=False) with mock.patch("dss.events.Config.get_flashflood_handle", return_value=ff): ret = events.record_event_for_bundle(replica, key, prefixes) used_prefixes = prefixes or replica.flashflood_prefix_write self.assertEqual(len(used_prefixes), ff.put.call_count) self.assertEqual(metadata_document, ret) for args, pfx in zip(ff.call_args_list, used_prefixes): expected = ((resources.s3, Config.get_flashflood_bucket(), pfx),) self.assertEqual(args, expected)
def _walk(self) -> None: executor = ThreadPoolExecutor(len(DEFAULT_BACKENDS)) # We can't use executor as context manager because we don't want shutting it down to block try: remaining_backend_time = AdjustedRemainingTime( actual=self._remaining_time, offset=-self.shutdown_time) backend = CompositeIndexBackend( executor=executor, backends=DEFAULT_BACKENDS, remaining_time=remaining_backend_time, dryrun=self.dryrun, notify=self.notify) replica = Replica[self.replica] indexer_cls = Indexer.for_replica(replica) indexer = indexer_cls(backend, remaining_backend_time) handle = Config.get_blobstore_handle(replica) if self.bucket != replica.bucket: logger.warning( f'Indexing bucket {self.bucket} instead of default {self.bucket}.' ) blobs: PagedIter = handle.list_v2(self.bucket, prefix=f'bundles/{self.work_id}', start_after_key=self.marker, token=self.token) for key in blobs: # Timing out while recording paging info could cause an inconsistent paging state, leading to repeats # of large amounts of work. This can be avoided by checking for timeouts only during actual # re-indexing. The indexer performs this check for every item. self.work_result['processed'] += 1 try: indexer.index_object(key) except IndexerTimeout as e: self.work_result['failed'] += 1 logger.warning( f'{self.work_id} timed out during index visitation: {e}' ) break except Exception: self.work_result['failed'] += 1 logger.warning(f'Index visitation failed for {key}', exc_info=True) else: self.work_result['indexed'] += 1 self.marker = blobs.start_after_key self.token = blobs.token else: self._status = WalkerStatus.finished.name finally: executor.shutdown(False)
def _test_file_put_cached(self, replica: Replica, scheme: str, test_bucket: str, test_checkout_bucket: str, uploader: Uploader): stored_cache_criteria = os.environ.get('CHECKOUT_CACHE_CRITERIA') try: os.environ[ 'CHECKOUT_CACHE_CRITERIA'] = '[{"type":"application/json","max_size":12314}]' handle = Config.get_blobstore_handle(replica) src_key = generate_test_key() src_data = b'{"status":"valid"}' source_url = f"{scheme}://{test_bucket}/{src_key}" file_uuid = str(uuid.uuid4()) bundle_uuid = str(uuid.uuid4()) version = datetime_to_version_format(datetime.datetime.utcnow()) # write dummy file and upload to upload area with tempfile.NamedTemporaryFile(delete=True) as fh: fh.write(src_data) fh.flush() uploader.checksum_and_upload_file(fh.name, src_key, "application/json") # upload file to DSS self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version) metadata = handle.get_user_metadata(test_bucket, src_key) dst_key = ("blobs/" + ".".join([ metadata['hca-dss-sha256'], metadata['hca-dss-sha1'], metadata['hca-dss-s3_etag'], metadata['hca-dss-crc32c'] ])).lower() for wait_to_upload_into_checkout_bucket in range(30): try: # get uploaded blob key from the checkout bucket file_metadata = json.loads( handle.get(test_checkout_bucket, dst_key).decode("utf-8")) break except BlobNotFoundError: time.sleep(1) else: file_metadata = json.loads( handle.get(test_checkout_bucket, dst_key).decode("utf-8")) assert file_metadata[ "status"] == "valid" # the file exists in the checkout bucket finally: os.environ['CHECKOUT_CACHE_CRITERIA'] = stored_cache_criteria
def _list_checkout_bundle( replica: Replica, bundle_uuid: str, bundle_version: typing.Optional[str], ) -> typing.List[typing.Tuple[str, dict]]: """ Lists the contents of a bundle in checkout. :param replica: Cloud replica :param bundle_uuid: Bundle UUID :param bundle_version: Bundle version :return: List of checkout bundle contents """ handle = Config.get_blobstore_handle(replica) prefix = get_dst_bundle_prefix(bundle_uuid, bundle_version) return list(handle.list_v2(replica.checkout_bucket, prefix))
def setUp(self): self.remaining_time = SpecificRemainingTime(10) Config.set_config(BucketConfig.TEST) self.s3_test_fixtures_bucket = get_env("DSS_S3_BUCKET_TEST_FIXTURES") self.gs_test_fixtures_bucket = get_env("DSS_GS_BUCKET_TEST_FIXTURES") self.s3_test_bucket = get_env("DSS_S3_BUCKET_TEST") self.gs_test_bucket = get_env("DSS_GS_BUCKET_TEST") class VT(Visitation): def walker_walk(self): pass registered_visitations.registered_visitations['VT'] = VT self.job_state = { '_visitation_class_name': 'VT', 'work_ids': ['1', '2', '3', '4'], '_number_of_workers': 3, } self.walker_state = { '_visitation_class_name': 'VT', 'work_ids': [['1', '2'], ['3', '4']], }