def _get_collection_files(self) -> list:
     collection_files, _ = list_s3_objects(
         self.bucket_name,
         get_path_to_collections(self.prefix_name),
         start_after_key=None,
         delimiter="",
     )
     return collection_files
def test_manager_export_load(out_dir):
    cm = CollectionManager()
    cm.get("default").include("loss")
    c = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    rc = tf.math.reduce_max(c)
    cm.get("default").add_tensor(c)
    cm.add(Collection("trial1"))
    cm.add("trial2")
    cm.get("trial2").include("total_loss")
    cm.export(out_dir, "cm.json")
    cm2 = CollectionManager.load(os.path.join(get_path_to_collections(out_dir), "cm.json"))
    assert cm == cm2
def test_manager_export_load():
    cm = CollectionManager()
    cm.create_collection("default")
    cm.get("default").include("loss")
    cm.add(Collection("trial1"))
    cm.add("trial2")
    cm.get("trial2").include("total_loss")
    cm.export("/tmp/dummy_trial", DEFAULT_COLLECTIONS_FILE_NAME)
    cm2 = CollectionManager.load(
        os.path.join(get_path_to_collections("/tmp/dummy_trial"),
                     DEFAULT_COLLECTIONS_FILE_NAME))
    assert cm == cm2
def test_parse_worker_name_from_collection_file():
    path = "s3://smdebug-testing/resources/one-index-file"
    _, bucket_name, key_name = is_s3(path)

    collection_files, _ = list_s3_objects(bucket_name, get_path_to_collections(key_name))

    assert len(collection_files) == 1

    collection_file = collection_files[0]
    worker_name = get_worker_name_from_collection_file(collection_file)
    assert worker_name == "/job:worker/replica:0/task:1/device:GPU:0"

    file_name = "/tmp/collections/000000000/job-worker_1_collections.json"
    worker_name = get_worker_name_from_collection_file(file_name)
    assert worker_name == "job-worker_1"
Example #5
0
def helper_test_only_w_g(trial_dir, hook):
    simple_model(hook)
    steps, _ = get_dirs_files(os.path.join(trial_dir, "events"))
    collection_files = get_collection_files(trial_dir)

    assert DEFAULT_COLLECTIONS_FILE_NAME in collection_files
    cm = CollectionManager.load(
        join(get_path_to_collections(trial_dir),
             DEFAULT_COLLECTIONS_FILE_NAME))
    assert hook.get_collections() == cm.collections
    num_tensors_loaded_collection = (
        len(cm.collections["weights"].tensor_names) +
        len(cm.collections["gradients"].tensor_names) +
        len(cm.collections["default"].tensor_names))
    assert num_tensors_loaded_collection == 2
    assert len(steps) == 5
def helper_test_simple_include_regex(trial_dir, hook):
    simple_model(hook, steps=10)
    _, files = get_dirs_files(trial_dir)
    steps, _ = get_dirs_files(os.path.join(trial_dir, "events"))

    cm = CollectionManager.load(
        join(get_path_to_collections(trial_dir), DEFAULT_COLLECTIONS_FILE_NAME)
    )
    assert len(cm.collections["default"].tensor_names) == 1
    assert len(steps) == 5

    for step in steps:
        i = 0
        size = 0
        fs = glob.glob(join(trial_dir, "events", step, "**", "*.tfevents"), recursive=True)
        for f in fs:
            fr = FileReader(f)
            for x in fr.read_tensors():
                tensor_name, step, tensor_data, mode, mode_step = x
                i += 1
                size += tensor_data.nbytes if tensor_data is not None else 0
        assert i == 1
        assert size == 4
 def _get_collection_files(self) -> list:
     return list_files_in_directory(get_path_to_collections(self.path))
Example #8
0
def get_collection_files(path):
    path = get_path_to_collections(path)
    entries = os.listdir(path)
    files = [f for f in entries if isfile(join(path, f))]
    return files