Python sort_s3_files_by_date_str 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: emmaa.util

메소드/함수: sort_s3_files_by_date_str

hotexamples.com에서의 예제들: 2

Python sort_s3_files_by_date_str - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 emmaa.util.sort_s3_files_by_date_str에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_s3.py 프로젝트: indralab/emmaa

def test_util_find_on_s3_functions():
    # Local imports are recommended when using moto
    from emmaa.util import sort_s3_files_by_date_str, find_latest_s3_file, \
        find_nth_latest_s3_file, find_number_of_files_on_s3
    # Bucket has mm (pkl) and results (json) files, both in results folder
    client = setup_bucket(add_mm=True, add_results=True)
    # Get both
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/')
    assert len(files) == 2
    # Specific extension
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/',
                                      '.json')
    assert len(files) == 1
    # Longer prefix
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME,
                                      'results/test/results_')
    assert len(files) == 1
    assert find_latest_s3_file(TEST_BUCKET_NAME, 'results/test/results_')
    assert not find_nth_latest_s3_file(1, TEST_BUCKET_NAME,
                                       'results/test/results_')
    assert find_nth_latest_s3_file(1, TEST_BUCKET_NAME, 'results/test/')
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/') == 2
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/results_') == 1
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/',
                                      '.json') == 1

예제 #2

파일 보기

파일: manager.py 프로젝트: bgyori/emmaa

 def add_model_from_s3(self, model_id, config=None, number_of_updates=3,
                       bucket=EMMAA_BUCKET_NAME):
     """Add data for one model from S3 files."""
     if not config:
         config = load_config_from_s3(model_id)
     test_corpora = config['test']['test_corpus']
     if isinstance(test_corpora, str):
         test_corpora = [test_corpora]
     stmt_files = sort_s3_files_by_date_str(
         bucket, f'assembled/{model_id}/statements_', '.gz')
     stmt_files_to_use = stmt_files[:number_of_updates]
     for stmt_file in stmt_files_to_use:
         date = strip_out_date(stmt_file, 'date')
         dt = strip_out_date(stmt_file, 'datetime')
         # First get and add statements
         stmt_jsons = load_gzip_json_from_s3(bucket, stmt_file)
         self.add_statements(model_id, date, stmt_jsons)
         # Also update the path counts from each test corpus
         for test_corpus in test_corpora:
             key = f'results/{model_id}/results_{test_corpus}_{dt}.json'
             try:
                 results = load_json_from_s3(bucket, key)
                 path_counts = results[0].get('path_stmt_counts')
                 if path_counts:
                     self.update_statements_path_counts(
                         model_id, date, path_counts)
             except ClientError as e:
                 if e.response['Error']['Code'] == 'NoSuchKey':
                     logger.warning(f'No results file for {key}, skipping')
                     continue
                 else:
                     raise e