def setup_module(): """ Sets up the module for testing. """ mock.start() # set up test cache and alter the dataset module's cache to point to the test cache # must ensure exists so we can copy the config.yml into it test_cache.path = test_dir / '.testing' test_cache.ensure_exists() dataset_cache.path = test_cache.path / Path('datasets') # copy config file from test data into temporary testing_cache # copyfile(test_data_dir / Path('config.yml'), global_cache.path / Path('config.yml')) copyfile(test_data_dir / Path('config.yml'), test_cache.path / Path('config.yml')) config = get_config() S3 = boto3.resource('s3', region_name='us-east-1') # We need to create the bucket since this is all in Moto's 'virtual' AWS account S3.create_bucket(Bucket=config['dataset_bucket_name']) bucket = S3.Bucket(config['dataset_bucket_name']) bucket.put_object(Key='test_dataset_1/metadata.json', Body=open(test_data_dir / Path('test_metadata_1.json'), 'rb')) bucket.put_object(Key='test_dataset_2/metadata.json', Body=open(test_data_dir / Path('test_metadata_2.json'), 'rb'))
def process_result(ctx: click.Context, result: CreateOutput, config: str): """Processes output of dataset creation Args: ctx (Context): click context object result (CreateOutput): result of dataset creation plugin config (str): original config provided by user Returns: result (CreateOutput): result of dataset creation plugin """ if result is not None: # Gets dataset information from CreateInput ci = ctx.obj dataset_name = ci.metadata['dataset_name'] dataset_path = ci.dataset_path / dataset_name # Uploads dataset to S3 if (ci.upload): bucketConfig = get_config() bucket = bucketConfig["dataset_bucket_name"] cli_spinner("Uploading dataset to S3...", upload_directory, bucket_name=bucket, prefix=dataset_name, local_path=dataset_path) # Deletes local dataset if (ci.delete_local): cli_spinner("Deleting " + dataset_name + " dataset...", shutil.rmtree, dataset_path) return result
def get_imageset_names() -> list: """Retrieves the names of all available imagesets in bucket pointed to by global config. Returns: list: imageset names """ config = get_config() return list_top_level_bucket_prefixes(config[BUCKET_FIELD])
def upload_dict_to_s3_as_json(s3_path: str, obj: dict): """Uploads given dictionary to model bucket on S3. Args: s3_path (str): full s3 path to save dictionary to, (no .json) obj (dict): dictionary to save """ S3 = boto3.resource('s3') config = get_config() model_bucket = S3.Bucket(config['model_bucket_name']) model_bucket.put_object(Body=json.dumps(obj, indent=2), Key=s3_path+'.json')
def _ensure_dataset(name: str): """Ensures dataset exists. Args: name (str): name of dataset Raises: ValueError: if dataset name is invalid (no matching objects in S3 bucket) """ config = get_config() if not download_prefix(config[BUCKET_FIELD], name, dataset_cache): raise ValueError(name)
def upload_file_to_s3(prefix: str, file_path: Path, alternate_name=None): """Uploads file at given file path to model bucket on S3. Args: prefix (str): prefix for filename on S3 file_path (Path): path to file alternate_name (str, optional): name to override local file name """ S3 = boto3.resource('s3') config = get_config() model_bucket = S3.Bucket(config['model_bucket_name']) upload_path = prefix + '/' + file_path.name if alternate_name is None \ else prefix + '/' + alternate_name model_bucket.upload_file(str(file_path), upload_path)
def download_imagesets(self, imageset_list): """Util for downloading all imagesets needed for imageset creation. Args: imageset_list (list): list of imageset names needed """ # Get image bucket name bucketConfig = get_config() image_bucket_name = bucketConfig.get('image_bucket_name') # Downloads each imageset and appends local path to 'self.imageset_paths' for imageset in imageset_list: imageset_path = 'imagesets/' self.imageset_cache.ensure_subpath_exists(imageset_path) download_prefix(image_bucket_name, imageset, self.imageset_cache, imageset_path) self.imageset_paths.append(self.imageset_cache.path / 'imagesets' / imageset)
def _ensure_metadata(name: str): """Ensure imageset metadata exists. NOTE: This function works around the fact that we don't have imageset wide metadata files for all imagesets. In that case, it picks a random metadata file and reports the fields contained within. Args: name (str): name of imageset Raises: ClientError: If the given imageset name does not exist in the S3 bucket. StopIteration: If the given imageset does not have any metadata files named according to the standard scheme. """ S3 = boto3.resource('s3') config = get_config() image_bucket = S3.Bucket(config[BUCKET_FIELD]) cache_metadata_path = Path( name ) / 'metadata.json' # relative path inside the cache where metadata will go if not imageset_cache.subpath_exists(cache_metadata_path): imageset_cache.ensure_subpath_exists(name) # attempt to download set-wide metadata.json imageset_bucket_metadata_key = f'{name}/metadata.json' metadata_download_absolute_path = imageset_cache.path / cache_metadata_path try: # attempt to grab imageset-wide metadata image_bucket.download_file(imageset_bucket_metadata_key, str(metadata_download_absolute_path)) except ClientError as e: # fallback to grabbing a single image metadata file (better than nothing) prefix = f'{name}/meta_' # get all items in bucket with this prefix, but limit results to 1 image_metadata_key_collection = image_bucket.objects.filter( Delimiter='/', Prefix=prefix).limit(1) # filter() returns a collection iterable, which we must convert to an iterator (generator) and call next on try: image_metadata_key = next( iter(image_metadata_key_collection)).key image_bucket.download_file( image_metadata_key, str(metadata_download_absolute_path)) # explicitly reraise these errors for verbosity except ClientError as e: raise except StopIteration as e: raise
def test_update_no_existing_config_with_options(): """Basically the same as test_update, except we clean the existing configuration file away before calling it. Ensures the command properly creates a new config file. """ conf = get_config() test_cache.clean() result = runner.invoke(config_cmd_group, [ 'update', '-d', 'test_data_buck', '-m', 'test_art_buck', '-i', 'test_img_buck' ]) assert result.exit_code == 0 with open(test_data_dir / Path('config_update_contents.txt'), 'r') as myfile: data = myfile.read() with open(test_cache.path / Path('config.yml'), 'r') as myfile: config_contents = myfile.read() assert config_contents == data update_config(conf)
def list_top_level_bucket_prefixes(bucket_name: str): """Lists all top level prefixes in an S3 bucket. A top level prefix means it is the first in the chain. This will not list any subprefixes. Ex: Bucket contains an element a/b/c/d.json, this function will only list a. Args: bucket_name (str): name of S3 bucket Returns: list: prefix strings """ S3 = boto3.resource('s3') config = get_config() bucket_contents = S3.meta.client.list_objects(Bucket=bucket_name, Delimiter='/') contents = [] if bucket_contents.get('CommonPrefixes') is not None: for obj in bucket_contents.get('CommonPrefixes'): contents.append(obj.get('Prefix')[:-1]) return contents
def clean(all: bool): """ Cleans locally saved ravenml cache files. Args: all (bool): T/F whether to clean all files from cache, including configuration YAML, default false """ if all: if not cache.clean(): click.echo(Fore.RED + 'No cache to clean.') else: try: config = get_config() cache.clean() update_config(config) except ValueError: click.echo(Fore.RED + 'Bad configuration file. Deleting alongside cache.') cache.clean() except FileNotFoundError: if not cache.clean(): click.echo(Fore.RED + 'No cache to clean.')
def _ensure_metadata(name: str): """Ensure dataset metadata exists. Args: name (str): name of dataset Raises: ValueError: if dataset name is invalid and metadata cannot be downloaded. """ S3 = boto3.resource('s3') config = get_config() DATASET_BUCKET = S3.Bucket(config['dataset_bucket_name']) metadata_path = Path(name) / 'metadata.json' if not dataset_cache.subpath_exists(metadata_path): dataset_cache.ensure_subpath_exists(name) metadata_key = f'{name}/metadata.json' metadata_absolute_path = dataset_cache.path / metadata_path try: DATASET_BUCKET.download_file(metadata_key, str(metadata_absolute_path)) except ClientError as e: raise ValueError(name) from e
def show(ctx: click.Context): """Show current config. Args: ctx (Context): click context object """ try: # try and load the configuration config = get_config() for key, value in config.items(): print(Fore.GREEN + key + ': ' + Fore.WHITE + value) except FileNotFoundError: # thrown when no configuration file is found click.echo(Fore.RED + 'No configuration found.') if user_confirms('Would you like to make a new configuration?', default=True): ctx.obj['from_show'] = True ctx.invoke(update) except ValueError: # thrown when current configuration file is invalid click.echo(Fore.RED + 'Current configuration file is invalid.') if user_confirms('Would you like to fix it?', default=True): ctx.obj['from_show'] = True ctx.invoke(update)
def update(ctx: click.Context, image_bucket: str, dataset_bucket: str, model_bucket: str): """Update current config. Args: ctx (Context): click context object artifact_bucket (str): artifact bucket name. None if not in no-user mode dataset_bucket (str): dataset bucket name. None if not in no-user mode """ config = {} load_result = 0 try: # try and load the configuration config = get_config() except FileNotFoundError: # thrown when no configuration file is found # checks to see if flag is set to indicate we arrived here from show if not ctx.obj['from_show']: click.echo( Fore.RED + 'No configuration found to update. A new one will be created.') load_result = 1 except ValueError: # thrown current configuration file is invalid # checks to see if flag is set to indicate we arrived here from show if not ctx.obj['from_show']: click.echo( Fore.RED + 'Current configuration is invalid. A new one will be created.') load_result = 2 # map options into dict user_options = { 'image_bucket_name': image_bucket, 'dataset_bucket_name': dataset_bucket, 'model_bucket_name': model_bucket, } # if options were passed, just update those options if (image_bucket or dataset_bucket or model_bucket): # only use options if existing config was valid or a new config is beig created if load_result == 0 or load_result == 1: for field, value in user_options.items(): if value: config[field] = value else: click.echo( Fore.RED + "Passed options ignored since current configuration is invalid." ) # otherwise iterate over each field else: for field in CONFIG_FIELDS: # only make field editing optional if existing config was loaded successfully if load_result == 0: if user_confirms('Edit ' + field + '?'): config[field] = user_input(field + ':', default=config[field]) else: config[field] = user_input(field + ':') # save updates update_config(config)