Exemple #1
0
def write_dicts_to_jsonl_gz(data, s3_path):
    file_as_string = json.dumps(data[0])
    for d in data[1:]:
        file_as_string += '\n'
        file_as_string += json.dumps(d)
    b, k = s3_path_to_bucket_key(s3_path)
    compressed_out = gzip.compress(bytes(file_as_string, 'utf-8'))
    s3_resource.Object(b, k).put(Body=compressed_out)
Exemple #2
0
def read_jsonl_from_s3(s3_path, encoding='utf-8', compressed=False) :
    """
    read a jsonl file from an s3 path
    """
    bucket, key = s3_path_to_bucket_key(s3_path)
    obj = s3_resource.Object(bucket, key)
    text = obj.get()['Body'].read()
    
    if compressed:
        split_text = gzip.decompress(text).decode(encoding).split('\n')
    else:
        split_text = text.decode(encoding).split('\n')
    
    data = []
    for t in split_text:
        data.append(json.loads(t))
        
    return data
Exemple #3
0
    raise ValueError('bucket_key_to_s3_path FAILURE')

out = s3.bucket_key_to_s3_path(bucket, 'some/path/')
if out != 's3://alpha-gluejobutils/some/path/':
    raise ValueError('bucket_key_to_s3_path FAILURE')

out = s3.bucket_key_to_s3_path(bucket, 'some/path')
if out != 's3://alpha-gluejobutils/some/path':
    raise ValueError('bucket_key_to_s3_path FAILURE')

print("===> bucket_key_to_s3_path ===> OK")

### ### ### ### ### ### ### ###
### s3_path_to_bucket_key ###
### ### ### ### ### ### ### ###
b, o = s3.s3_path_to_bucket_key(
    's3://alpha-gluejobutils/testing/data/diamonds_csv/diamonds.csv')
if b != 'alpha-gluejobutils' or o != 'testing/data/diamonds_csv/diamonds.csv':
    raise ValueError('s3_path_to_bucket_key FAILURE')

b, o = s3.s3_path_to_bucket_key('s3://alpha-gluejobutils/testing/data')
if b != 'alpha-gluejobutils' or o != 'testing/data':
    raise ValueError('s3_path_to_bucket_key FAILURE')

b, o = s3.s3_path_to_bucket_key('s3://alpha-gluejobutils/testing/data/')
if b != 'alpha-gluejobutils' or o != 'testing/data/':
    raise ValueError('s3_path_to_bucket_key FAILURE')
print("===> s3_path_to_bucket_key ===> OK")

### ### ### ### ### ### ###
### read_json_from_s3 ###
### ### ### ### ### ### ###