コード例 #1
0
ファイル: test_s3.py プロジェクト: indralab/emmaa
def test_util_find_on_s3_functions():
    # Local imports are recommended when using moto
    from emmaa.util import sort_s3_files_by_date_str, find_latest_s3_file, \
        find_nth_latest_s3_file, find_number_of_files_on_s3
    # Bucket has mm (pkl) and results (json) files, both in results folder
    client = setup_bucket(add_mm=True, add_results=True)
    # Get both
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/')
    assert len(files) == 2
    # Specific extension
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/',
                                      '.json')
    assert len(files) == 1
    # Longer prefix
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME,
                                      'results/test/results_')
    assert len(files) == 1
    assert find_latest_s3_file(TEST_BUCKET_NAME, 'results/test/results_')
    assert not find_nth_latest_s3_file(1, TEST_BUCKET_NAME,
                                       'results/test/results_')
    assert find_nth_latest_s3_file(1, TEST_BUCKET_NAME, 'results/test/')
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/') == 2
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/results_') == 1
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/',
                                      '.json') == 1
コード例 #2
0
 def _get_previous_json_stats(self):
     key = find_latest_s3_file(
         self.bucket, f'model_stats/{self.model_name}/model_stats_',
         '.json')
     # This is the first time statistics is generated for this model
     if key is None:
         logger.info(f'Could not find a key to the previous statistics ')
         return
     # If stats for this date exists, previous stats is the second latest
     if strip_out_date(key) == self.latest_round.date_str:
         logger.info(f'Statistics for latest round already exists')
         key = find_nth_latest_s3_file(
             1, self.bucket, f'model_stats/{self.model_name}/model_stats_',
             '.json')
     # Store the date string to find previous round with it
     self.previous_date_str = strip_out_date(key)
     logger.info(f'Loading earlier statistics from {key}')
     previous_json_stats = load_json_from_s3(self.bucket, key)
     return previous_json_stats
コード例 #3
0
ファイル: model.py プロジェクト: kolusask/emmaa
def last_updated_date(model,
                      file_type='model',
                      date_format='date',
                      tests='large_corpus_tests',
                      extension='.pkl',
                      n=0,
                      bucket=EMMAA_BUCKET_NAME):
    """Find the most recent or the nth file of given type on S3 and return its
    creation date.

    Example file name:
    models/aml/model_2018-12-13-18-11-54.pkl

    Parameters
    ----------
    model : str
        Model name to look for
    file_type : str
        Type of a file to find the latest file for. Accepted values: 'model',
        'test_results', 'model_stats', 'test_stats'.
    date_format : str
        Format of the returned date. Accepted values are 'datetime' (returns a
        date in the format "YYYY-MM-DD-HH-mm-ss") and 'date' (returns a date
        in the format "YYYY-MM-DD"). Default is 'date'.
    extension : str
        The extension the model file needs to have. Default is '.pkl'
    n : int
        Index of the file in list of S3 files sorted by date (0-indexed).
    bucket : str
        Name of bucket on S3.

    Returns
    -------
    last_updated : str
        A string of the selected format.
    """
    if file_type == 'model':
        folder_name = 'models'
        prefix_new = prefix_old = f'models/{model}/model_'
    elif file_type == 'test_results':
        prefix_new = f'results/{model}/results_{tests}'
        prefix_old = f'results/{model}/results_'
    elif file_type == 'model_stats':
        prefix_new = f'model_stats/{model}/model_stats_'
        prefix_old = f'stats/{model}/stats_'
    elif file_type == 'test_stats':
        prefix_new = f'stats/{model}/test_stats_{tests}'
        prefix_old = f'stats/{model}/stats_'
    else:
        raise TypeError(f'Files of type {file_type} are not supported')
    try:
        return strip_out_date(find_nth_latest_s3_file(n=n,
                                                      bucket=bucket,
                                                      prefix=prefix_new,
                                                      extension=extension),
                              date_format=date_format)
    except TypeError:
        try:
            return strip_out_date(find_nth_latest_s3_file(n=n,
                                                          bucket=bucket,
                                                          prefix=prefix_old,
                                                          extension=extension),
                                  date_format=date_format)
        except TypeError:
            logger.info('Could not find latest update date')
            return ''