Beispiel #1
0
def setup_module():
    """ Sets up the module for testing.
    """
    mock.start()

    # set up test cache and alter the dataset module's cache to point to the test cache
    # must ensure exists so we can copy the config.yml into it
    test_cache.path = test_dir / '.testing'
    test_cache.ensure_exists()
    dataset_cache.path = test_cache.path / Path('datasets')

    # copy config file from test data into temporary testing_cache
    # copyfile(test_data_dir / Path('config.yml'), global_cache.path / Path('config.yml'))
    copyfile(test_data_dir / Path('config.yml'),
             test_cache.path / Path('config.yml'))

    config = get_config()
    S3 = boto3.resource('s3', region_name='us-east-1')
    # We need to create the bucket since this is all in Moto's 'virtual' AWS account
    S3.create_bucket(Bucket=config['dataset_bucket_name'])
    bucket = S3.Bucket(config['dataset_bucket_name'])
    bucket.put_object(Key='test_dataset_1/metadata.json',
                      Body=open(test_data_dir / Path('test_metadata_1.json'),
                                'rb'))
    bucket.put_object(Key='test_dataset_2/metadata.json',
                      Body=open(test_data_dir / Path('test_metadata_2.json'),
                                'rb'))
Beispiel #2
0
def process_result(ctx: click.Context, result: CreateOutput, config: str):
    """Processes output of dataset creation
    
    Args:
        ctx (Context): click context object
        result (CreateOutput): result of dataset creation plugin
        config (str): original config provided by user
    Returns:
        result (CreateOutput): result of dataset creation plugin
    """
    if result is not None:
        # Gets dataset information from CreateInput
        ci = ctx.obj
        dataset_name = ci.metadata['dataset_name']
        dataset_path = ci.dataset_path / dataset_name

        # Uploads dataset to S3
        if (ci.upload):
            bucketConfig = get_config()
            bucket = bucketConfig["dataset_bucket_name"]
            cli_spinner("Uploading dataset to S3...",
                        upload_directory,
                        bucket_name=bucket,
                        prefix=dataset_name,
                        local_path=dataset_path)

        # Deletes local dataset
        if (ci.delete_local):
            cli_spinner("Deleting " + dataset_name + " dataset...",
                        shutil.rmtree, dataset_path)

    return result
Beispiel #3
0
def get_imageset_names() -> list:
    """Retrieves the names of all available imagesets in bucket pointed to by global config.

    Returns:
        list: imageset names
    """
    config = get_config()
    return list_top_level_bucket_prefixes(config[BUCKET_FIELD])
Beispiel #4
0
def upload_dict_to_s3_as_json(s3_path: str, obj: dict):
    """Uploads given dictionary to model bucket on S3.

    Args:
        s3_path (str): full s3 path to save dictionary to, (no .json)
        obj (dict): dictionary to save
    """
    S3 = boto3.resource('s3')
    config = get_config()
    model_bucket = S3.Bucket(config['model_bucket_name'])   
    model_bucket.put_object(Body=json.dumps(obj, indent=2), Key=s3_path+'.json')
Beispiel #5
0
def _ensure_dataset(name: str):
    """Ensures dataset exists.

    Args:
        name (str): name of dataset
        
    Raises:
        ValueError: if dataset name is invalid (no matching objects in S3 bucket)
    """
    config = get_config()
    if not download_prefix(config[BUCKET_FIELD], name, dataset_cache):
        raise ValueError(name)
Beispiel #6
0
def upload_file_to_s3(prefix: str, file_path: Path, alternate_name=None):
    """Uploads file at given file path to model bucket on S3.

    Args:
        prefix (str): prefix for filename on S3
        file_path (Path): path to file
        alternate_name (str, optional): name to override local file name
    """
    S3 = boto3.resource('s3')
    config = get_config()
    model_bucket = S3.Bucket(config['model_bucket_name'])
    upload_path = prefix + '/' + file_path.name if alternate_name is None \
                    else prefix + '/' + alternate_name
    model_bucket.upload_file(str(file_path), upload_path)
Beispiel #7
0
    def download_imagesets(self, imageset_list):
        """Util for downloading all imagesets needed for imageset creation.

        Args:
            imageset_list (list): list of imageset names needed
        """
        # Get image bucket name
        bucketConfig = get_config()
        image_bucket_name = bucketConfig.get('image_bucket_name')
        # Downloads each imageset and appends local path to 'self.imageset_paths'
        for imageset in imageset_list:
            imageset_path = 'imagesets/'
            self.imageset_cache.ensure_subpath_exists(imageset_path)
            download_prefix(image_bucket_name, imageset, self.imageset_cache,
                            imageset_path)
            self.imageset_paths.append(self.imageset_cache.path / 'imagesets' /
                                       imageset)
Beispiel #8
0
def _ensure_metadata(name: str):
    """Ensure imageset metadata exists.
    NOTE: This function works around the fact that we don't have
    imageset wide metadata files for all imagesets. In that case, it
    picks a random metadata file and reports the fields contained within.

    Args:
        name (str): name of imageset
        
    Raises:
        ClientError: If the given imageset name does not exist in the S3 bucket.
        StopIteration: If the given imageset does not have any metadata files named according to the standard scheme.
    """
    S3 = boto3.resource('s3')
    config = get_config()
    image_bucket = S3.Bucket(config[BUCKET_FIELD])
    cache_metadata_path = Path(
        name
    ) / 'metadata.json'  # relative path inside the cache where metadata will go
    if not imageset_cache.subpath_exists(cache_metadata_path):
        imageset_cache.ensure_subpath_exists(name)
        # attempt to download set-wide metadata.json
        imageset_bucket_metadata_key = f'{name}/metadata.json'
        metadata_download_absolute_path = imageset_cache.path / cache_metadata_path
        try:
            # attempt to grab imageset-wide metadata
            image_bucket.download_file(imageset_bucket_metadata_key,
                                       str(metadata_download_absolute_path))
        except ClientError as e:
            # fallback to grabbing a single image metadata file (better than nothing)
            prefix = f'{name}/meta_'
            # get all items in bucket with this prefix, but limit results to 1
            image_metadata_key_collection = image_bucket.objects.filter(
                Delimiter='/', Prefix=prefix).limit(1)
            # filter() returns a collection iterable, which we must convert to an iterator (generator) and call next on
            try:
                image_metadata_key = next(
                    iter(image_metadata_key_collection)).key
                image_bucket.download_file(
                    image_metadata_key, str(metadata_download_absolute_path))
            # explicitly reraise these errors for verbosity
            except ClientError as e:
                raise
            except StopIteration as e:
                raise
Beispiel #9
0
def test_update_no_existing_config_with_options():
    """Basically the same as test_update, except we clean the existing configuration
    file away before calling it. Ensures the command properly creates a new config file.
    """
    conf = get_config()
    test_cache.clean()
    result = runner.invoke(config_cmd_group, [
        'update', '-d', 'test_data_buck', '-m', 'test_art_buck', '-i',
        'test_img_buck'
    ])
    assert result.exit_code == 0
    with open(test_data_dir / Path('config_update_contents.txt'),
              'r') as myfile:
        data = myfile.read()
    with open(test_cache.path / Path('config.yml'), 'r') as myfile:
        config_contents = myfile.read()
    assert config_contents == data
    update_config(conf)
Beispiel #10
0
def list_top_level_bucket_prefixes(bucket_name: str):
    """Lists all top level prefixes in an S3 bucket.
    
    A top level prefix means it is the first in the chain. This will not list
    any subprefixes. 
    Ex: Bucket contains an element a/b/c/d.json, this function will only list a.
    
    Args:
        bucket_name (str): name of S3 bucket
        
    Returns:
        list: prefix strings
    """
    S3 = boto3.resource('s3')
    config = get_config()
    bucket_contents = S3.meta.client.list_objects(Bucket=bucket_name, Delimiter='/')
    contents = []
    if bucket_contents.get('CommonPrefixes') is not None:
        for obj in bucket_contents.get('CommonPrefixes'):
            contents.append(obj.get('Prefix')[:-1])
    return contents
Beispiel #11
0
def clean(all: bool):
    """ Cleans locally saved ravenml cache files.

    Args:
        all (bool): T/F whether to clean all files from cache, including
            configuration YAML, default false
    """
    if all:
        if not cache.clean():
            click.echo(Fore.RED + 'No cache to clean.')
    else:
        try:
            config = get_config()
            cache.clean()
            update_config(config)
        except ValueError:
            click.echo(Fore.RED +
                       'Bad configuration file. Deleting alongside cache.')
            cache.clean()
        except FileNotFoundError:
            if not cache.clean():
                click.echo(Fore.RED + 'No cache to clean.')
Beispiel #12
0
def _ensure_metadata(name: str):
    """Ensure dataset metadata exists.

    Args:
        name (str): name of dataset
        
    Raises:
        ValueError: if dataset name is invalid and metadata cannot be downloaded.
    """
    S3 = boto3.resource('s3')
    config = get_config()
    DATASET_BUCKET = S3.Bucket(config['dataset_bucket_name'])
    metadata_path = Path(name) / 'metadata.json'
    if not dataset_cache.subpath_exists(metadata_path):
        dataset_cache.ensure_subpath_exists(name)
        metadata_key = f'{name}/metadata.json'
        metadata_absolute_path = dataset_cache.path / metadata_path
        try:
            DATASET_BUCKET.download_file(metadata_key,
                                         str(metadata_absolute_path))
        except ClientError as e:
            raise ValueError(name) from e
Beispiel #13
0
def show(ctx: click.Context):
    """Show current config.
    
    Args:
        ctx (Context): click context object
    """
    try:
        # try and load the configuration
        config = get_config()
        for key, value in config.items():
            print(Fore.GREEN + key + ': ' + Fore.WHITE + value)
    except FileNotFoundError:
        # thrown when no configuration file is found
        click.echo(Fore.RED + 'No configuration found.')
        if user_confirms('Would you like to make a new configuration?',
                         default=True):
            ctx.obj['from_show'] = True
            ctx.invoke(update)
    except ValueError:
        # thrown when current configuration file is invalid
        click.echo(Fore.RED + 'Current configuration file is invalid.')
        if user_confirms('Would you like to fix it?', default=True):
            ctx.obj['from_show'] = True
            ctx.invoke(update)
Beispiel #14
0
def update(ctx: click.Context, image_bucket: str, dataset_bucket: str,
           model_bucket: str):
    """Update current config.
    
    Args:
        ctx (Context): click context object
        artifact_bucket (str): artifact bucket name. None if not in no-user mode
        dataset_bucket (str): dataset bucket name. None if not in no-user mode
    """
    config = {}
    load_result = 0
    try:
        # try and load the configuration
        config = get_config()
    except FileNotFoundError:
        # thrown when no configuration file is found
        # checks to see if flag is set to indicate we arrived here from show
        if not ctx.obj['from_show']:
            click.echo(
                Fore.RED +
                'No configuration found to update. A new one will be created.')
        load_result = 1
    except ValueError:
        # thrown current configuration file is invalid
        # checks to see if flag is set to indicate we arrived here from show
        if not ctx.obj['from_show']:
            click.echo(
                Fore.RED +
                'Current configuration is invalid. A new one will be created.')
        load_result = 2

    # map options into dict
    user_options = {
        'image_bucket_name': image_bucket,
        'dataset_bucket_name': dataset_bucket,
        'model_bucket_name': model_bucket,
    }

    # if options were passed, just update those options
    if (image_bucket or dataset_bucket or model_bucket):
        # only use options if existing config was valid or a new config is beig created
        if load_result == 0 or load_result == 1:
            for field, value in user_options.items():
                if value:
                    config[field] = value
        else:
            click.echo(
                Fore.RED +
                "Passed options ignored since current configuration is invalid."
            )
    # otherwise iterate over each field
    else:
        for field in CONFIG_FIELDS:
            # only make field editing optional if existing config was loaded successfully
            if load_result == 0:
                if user_confirms('Edit ' + field + '?'):
                    config[field] = user_input(field + ':',
                                               default=config[field])
            else:
                config[field] = user_input(field + ':')

    # save updates
    update_config(config)