Ejemplo n.º 1
0
    def __init__(self,
                 corpus_or_utt_ids,
                 feature_containers,
                 partition_size,
                 shuffle=True,
                 seed=None):
        if isinstance(corpus_or_utt_ids, audiomate.Corpus):
            self.utt_ids = list(corpus_or_utt_ids.utterances.keys())
        else:
            self.utt_ids = corpus_or_utt_ids

        if isinstance(feature_containers, containers.Container):
            self.containers = [feature_containers]
        else:
            self.containers = feature_containers

        if len(self.containers) == 0:
            raise ValueError('At least one container has to be provided!')

        self.partitions = []
        self.partition_size = units.parse_storage_size(partition_size)
        self.shuffle = shuffle

        # init random state
        self.rand = random.Random()
        self.rand.seed(a=seed)

        # check
        self._raise_error_if_container_is_missing_an_utterance()

        # Compute utterance size and length
        self.utt_sizes = self._scan()
        self.utt_lengths = self._get_all_lengths()

        self.reload()
Ejemplo n.º 2
0
    def __init__(self,
                 hdf5file,
                 partition_size,
                 shuffle=True,
                 seed=None,
                 includes=None,
                 excludes=None):
        self._file = hdf5file
        self._partition_size = units.parse_storage_size(partition_size)
        self._shuffle = shuffle
        self._seed = seed

        data_sets = self._filter_data_sets(hdf5file.keys(),
                                           includes=includes,
                                           excludes=excludes)
        if shuffle:
            _random_state(self._seed).shuffle(data_sets)

        self._data_sets = tuple(data_sets)
        self._partitions = []
        self._partition_idx = 0
        self._partition_data = None

        self._partition()
Ejemplo n.º 3
0
def test_partition_size_in_bytes():
    assert 1024 == units.parse_storage_size('1024')
Ejemplo n.º 4
0
def test_partition_size_in_bytes_specified_as_int():
    assert 1024 == units.parse_storage_size(1024)
Ejemplo n.º 5
0
def test_partition_size_half_a_gibibyte():
    assert 512 * 1024 * 1024 == units.parse_storage_size('0.5g')
Ejemplo n.º 6
0
def test_partition_size_fractions_of_bytes_are_ignored():
    assert 1 == units.parse_storage_size('1.1')
Ejemplo n.º 7
0
def test_partition_size_in_gibibytes_with_capital_g():
    assert 2 * 1024 * 1024 * 1024 == units.parse_storage_size('2G')
Ejemplo n.º 8
0
def test_partition_size_in_gibibytes():
    assert 2 * 1024 * 1024 * 1024 == units.parse_storage_size('2g')