def test_use_w_settings(self): update_settings = { 'swift:upload': { 'skip_identical': True, 'object_threads': 30 }, 'swift:download': { 'object_threads': 20 } } expected_settings = { 'swift:download': { 'container_threads': 10, 'object_threads': 20, 'shuffle': True, 'skip_identical': True }, 'swift:upload': { 'changed': False, 'checksum': True, 'leave_segments': True, 'object_threads': 30, 'segment_size': 1073741824, 'segment_threads': 10, 'skip_identical': True, 'use_slo': True } } self.assertEquals(settings._global_settings, test_settings) with settings.use(update_settings): self.assertEquals(settings._global_settings, test_settings) self.assertEquals(settings.get(), expected_settings) self.assertEquals(settings._global_settings, test_settings) self.assertEquals(settings.get(), test_settings)
def test_condition_failures(self): num_test_objs = 20 test_obj_size = 100 test_dir = self.test_container / 'test' with NamedTemporaryDirectory(change_dir=True) as tmp_d: self.create_dataset(tmp_d, num_test_objs, test_obj_size) Path('.').copytree(test_dir) # Verify a ConditionNotMet exception is thrown when attempting to list # a file that hasn't been uploaded expected_objs = { test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs + 1) } num_retries = settings.get()['swift']['num_retries'] with mock.patch('time.sleep') as mock_sleep: with self.assertRaises(swift.ConditionNotMetError): test_dir.list(condition=lambda results: expected_objs == set(results)) self.assertTrue(num_retries > 0) self.assertEquals(len(mock_sleep.call_args_list), num_retries) # Verify that the condition passes when excluding the non-extant file expected_objs = { test_dir / which_obj for which_obj in self.get_dataset_obj_names(num_test_objs) } objs = test_dir.list(condition=lambda results: expected_objs == set(results)) self.assertEquals(expected_objs, set(objs))
def test_env_vars_loaded(self): settings._initialize() initial_settings = settings.get()['swift'] self.assertEquals(initial_settings['username'], 'test_username') self.assertEquals(initial_settings['password'], 'test_password') self.assertEquals(initial_settings['num_retries'], 2) self.assertEquals(initial_settings['auth_url'], 'http://test_auth_url.com')
def _wrap_dx_calls(): """Updates the dx_auth_token from settings for dxpy Bubbles all dxpy exceptions as `DNAnexusError` classes """ auth_token = settings.get()['dx']['auth_token'] if auth_token: # pragma: no cover dxpy.set_security_context({ 'auth_token_type': 'Bearer', 'auth_token': auth_token }) try: yield except DXError as e: six.raise_from(_dx_error_to_descriptive_exception(e), e)
def _get_s3_client(): """Returns the boto3 client and initializes one if it doesn't already exist. We use a different boto3 client for each thread/process because boto3 clients are not thread-safe. Returns: boto3.Client: An instance of the S3 client. """ if not hasattr(_thread_local, 's3_client'): kwargs = {} for k, v in settings.get()['s3'].items(): # only pass through keyword arguments that are set to avoid # overriding Boto3's default lookup behavior if v: kwargs[k] = v session = boto3.session.Session(**kwargs) _thread_local.s3_client = session.client('s3') return _thread_local.s3_client
def test_use_nested_w_update(self): settings.update({'foo': 0}) self.assertEquals(settings.get(), {'foo': 0}) with settings.use({'foo': 1}): self.assertEquals(settings.get(), {'foo': 1}) self.assertEquals(settings._global_settings, {'foo': 0}) with settings.use({'foo': 2}): self.assertEquals(settings.get(), {'foo': 2}) self.assertEquals(settings._global_settings, {'foo': 0}) self.assertEquals(settings.get(), {'foo': 1}) self.assertEquals(settings._global_settings, {'foo': 0}) self.assertEquals(settings.get(), {'foo': 0}) self.assertFalse(hasattr(settings.thread_local, 'settings')) settings.update({'foo': 3}) self.assertEquals(settings.get(), {'foo': 3})
def upload(self, source, condition=None, use_manifest=False, headers=None, **kwargs): """Uploads a list of files and directories to s3. Note that the S3Path is treated as a directory. Note that for user-provided OBSUploadObjects, an empty directory's destination must have a trailing slash. Args: source (List[str|OBSUploadObject]): A list of source files, directories, and OBSUploadObjects to upload to S3. condition (function(results) -> bool): The method will only return when the results of upload matches the condition. use_manifest (bool): Generate a data manifest and validate the upload results are in the manifest. headers (dict): A dictionary of object headers to apply to the object. Headers will not be applied to OBSUploadObjects and any headers specified by an OBSUploadObject will override these headers. Headers should be specified as key-value pairs, e.g. {'ContentLanguage': 'en'} Returns: List[S3Path]: A list of the uploaded files as S3Paths. Notes: - This method uploads to paths relative to the current directory. """ if use_manifest and not (len(source) == 1 and os.path.isdir(source[0])): raise ValueError( 'can only upload one directory with use_manifest=True') utils.validate_condition(condition) files_to_convert = utils.walk_files_and_dirs( [name for name in source if not isinstance(name, OBSUploadObject)]) files_to_upload = [ obj for obj in source if isinstance(obj, OBSUploadObject) ] manifest_file_name = (Path(source[0]) / utils.DATA_MANIFEST_FILE_NAME if use_manifest else None) resource_base = self.resource or Path('') files_to_upload.extend([ OBSUploadObject( name, resource_base / (utils.with_trailing_slash( utils.file_name_to_object_name(name)) if Path(name).isdir() else utils.file_name_to_object_name(name)), options={'headers': headers} if headers else None) for name in files_to_convert if name != manifest_file_name ]) if use_manifest: # Generate the data manifest and save it remotely object_names = [o.object_name for o in files_to_upload] utils.generate_and_save_data_manifest(source[0], object_names) manifest_obj_name = resource_base / utils.file_name_to_object_name( manifest_file_name) manifest_obj = OBSUploadObject( str(manifest_file_name), manifest_obj_name, options={'headers': headers} if headers else None) self._upload_object(manifest_obj) # Make a condition for validating the upload manifest_cond = partial(utils.validate_manifest_list, object_names) condition = (utils.join_conditions(condition, manifest_cond) if condition else manifest_cond) options = settings.get()['s3:upload'] segment_size = utils.str_to_bytes(options.get('segment_size')) transfer_config = { 'multipart_threshold': segment_size, 'max_concurrency': options.get('segment_threads'), 'multipart_chunksize': segment_size } upload_w_config = partial(self._upload_object, config=transfer_config) uploaded = {'completed': [], 'failed': []} with S3UploadLogger(len(files_to_upload)) as ul: pool = ThreadPool(options['object_threads']) try: result_iter = pool.imap_unordered(upload_w_config, files_to_upload) while True: try: result = result_iter.next(0xFFFF) if result['success']: ul.add_result(result) uploaded['completed'].append(result) else: uploaded['failed'].append(result) except StopIteration: break pool.close() except BaseException: pool.terminate() raise finally: pool.join() if uploaded['failed']: raise exceptions.FailedUploadError( 'an error occurred while uploading', uploaded) utils.check_condition(condition, [r['dest'] for r in uploaded['completed']]) return uploaded
def download(self, dest, condition=None, use_manifest=False, **kwargs): """Downloads a directory from S3 to a destination directory. Args: dest (str): The destination path to download file to. If downloading to a directory, there must be a trailing slash. The directory will be created if it doesn't exist. condition (function(results) -> bool): The method will only return when the results of download matches the condition. Returns: List[S3Path]: A list of the downloaded objects. Notes: - The destination directory will be created automatically if it doesn't exist. - This method downloads to paths relative to the current directory. """ utils.validate_condition(condition) if use_manifest: object_names = utils.get_data_manifest_contents(self) manifest_cond = partial(utils.validate_manifest_list, object_names) condition = (utils.join_conditions(condition, manifest_cond) if condition else manifest_cond) source = utils.with_trailing_slash(self) files_to_download = [{ 'source': file, 'dest': dest } for file in source.list()] options = settings.get()['s3:download'] segment_size = utils.str_to_bytes(options.get('segment_size')) transfer_config = { 'multipart_threshold': segment_size, 'max_concurrency': options.get('segment_threads'), 'multipart_chunksize': segment_size } download_w_config = partial(self._download_object_worker, config=transfer_config) downloaded = {'completed': [], 'failed': []} with S3DownloadLogger(len(files_to_download)) as dl: pool = ThreadPool(options['object_threads']) try: result_iter = pool.imap_unordered(download_w_config, files_to_download) while True: try: result = result_iter.next(0xFFFF) if result['success']: dl.add_result(result) downloaded['completed'].append(result) else: downloaded['failed'].append(result) except StopIteration: break pool.close() except BaseException: pool.terminate() raise finally: pool.join() if downloaded['failed']: raise exceptions.FailedDownloadError( 'an error occurred while downloading', downloaded) utils.check_condition(condition, [r['source'] for r in downloaded['completed']]) return downloaded
def _use_contextmanager(self, value): with settings.use({'foo': value}): time.sleep(.01) self.assertEquals(settings.get(), {'foo': value}) time.sleep(.01)
def test_get(self): self.assertEquals(settings.get(), test_settings)