Esempio n. 1
0
def fetch_data_buckets_from_config(config_file="config.properties", data_section="data",
                                   required_bucket_dir_name='morf-data/'):
    """
    Fetch the buckets from data_section of config_file; warn if key does not exactle match directory_name.
    :param config_file: path to config file.
    :param data_section: section of config file with key-value pairs representing institution names and s3 paths.
    :param required_bucket_dir_name: directory or path that should match ALL values in data_section; if not, throws warning.
    :return: list of buckets to iterate over; no directories are returned because these should be uniform across all of the buckets.
    """
    cf = configparser.ConfigParser()
    cf.read(config_file)
    buckets = []
    for item in cf.items(data_section):
        item_url = item[1]
        bucket = get_bucket_from_url(item_url)
        dir = get_key_from_url(item_url)
        if dir != required_bucket_dir_name:
            msg = "[ERROR]: specified path {} does not match required directory name {}; change name of directories to be consistent or specify the correct directory to check for.".format(
                item_url, required_bucket_dir_name)
            print(msg)
            raise
        else:
            buckets.append(bucket)
    assert len(buckets) >= 1
    return tuple(buckets)
Esempio n. 2
0
def test_get_bucket_from_url():
    assert get_bucket_from_url("s3://my-bucket/some/file.txt") == "my-bucket"
    assert get_bucket_from_url(
        "s3://anotherbucket/some/file.txt") == "anotherbucket"