Ejemplo n.º 1
0
def test_create_training_dataset(dataset_name, settings):

    dir_path = annotator_utils.get_dataset_dir_path(settings.clip_type,
                                                    dataset_name)

    dataset = dataset_utils.create_training_dataset(dir_path, settings)

    show_training_dataset_stats(dataset)
Ejemplo n.º 2
0
def test_create_waveform_dataset_from_tfrecord_files(dataset_name, settings):

    dir_path = annotator_utils.get_dataset_dir_path(settings.clip_type,
                                                    dataset_name)

    dataset = dataset_utils.create_waveform_dataset_from_tfrecord_files(
        dir_path)

    show_waveform_dataset_stats(dataset, settings.waveform_sample_rate)
Ejemplo n.º 3
0
def evaluate_annotator(training_name, epoch_num):

    _, settings = annotator_utils.load_model_and_settings(
        training_name, epoch_num)

    dir_path = annotator_utils.get_dataset_dir_path(settings.clip_type,
                                                    'Validation')
    dataset = dataset_utils.create_validation_dataset(dir_path, settings)

    dataset = dataset.take(settings.validation_step_count)

    inferrer = Inferrer((training_name, epoch_num))

    bounds = inferrer.get_call_bounds(dataset)

    start_diff_counts = defaultdict(int)
    end_diff_counts = defaultdict(int)

    for (inferred_start_index, inferred_end_index, dataset_start_index,
         dataset_end_index) in bounds:

        dataset_start_index = dataset_start_index.numpy()
        dataset_end_index = dataset_end_index.numpy()

        sample_rate = settings.waveform_sample_rate
        start_diff = _get_diff(inferred_start_index, dataset_start_index,
                               sample_rate)
        end_diff = _get_diff(inferred_end_index, dataset_end_index,
                             sample_rate)

        if start_diff is not None:
            start_diff_counts[start_diff] += 1
            end_diff_counts[end_diff] += 1


#         print(
#             start_diff, end_diff,
#             inferred_start_index, inferred_end_index,
#             dataset_start_index, dataset_end_index)

    _show_diff_counts('Start', start_diff_counts, settings)
    _show_diff_counts('End', end_diff_counts, settings)

    _plot_diff_counts(training_name, epoch_num, start_diff_counts,
                      end_diff_counts, settings)
Ejemplo n.º 4
0
def show_dataset_sizes(settings):

    from tensorflow.data import TFRecordDataset

    for dataset_name in ('Training', 'Validation'):

        total_size = 0

        print(f'Sizes of files in dataset "{dataset_name}":')

        dir_path = annotator_utils.get_dataset_dir_path(
            settings.clip_type, dataset_name)

        file_paths = sorted(dir_path.glob('*.tfrecords'))

        for file_path in file_paths:
            dataset = TFRecordDataset([str(file_path)])
            size = 0
            for _ in dataset:
                size += 1
            print(f'    {file_path.name}: {size}')
            total_size += size

        print(f'Total size of dataset "{dataset_name}": {total_size}')
Ejemplo n.º 5
0
def get_dataset(name, settings):
    dir_path = annotator_utils.get_dataset_dir_path(settings.clip_type, name)
    return dataset_utils.create_training_dataset(dir_path, settings)