Ejemplo n.º 1
0
def batch_sequence(stop_event,
                   queue,
                   data,
                   feature_list,
                   features2spk,
                   batch_size=128,
                   min_len=200,
                   max_len=400,
                   shuffle=True,
                   seed=0):
    """Load features and fill a queue. Used in KaldiDataSeqQueue.

    Args:
        stop_event: An event indicating the reading is finished.
        queue: A queue to put the data.
        data: The kaldi data directory.
        feature_list: A list shows which features the process should read.
        features2spk: A dict map features to speaker index.
        batch_size: The batch_size
        min_len: The minimum length of the features.
        max_len: The maximum length of the features.
        shuffle: Load the feature from the 0-th frame or a random frame.
        seed: The number is used to generate a random seed
    """
    # Read the comment in batch_random
    rd = random.Random(os.urandom(4))
    rd.seed(seed)

    # rd.jumpahead(seed)

    feature_reader = FeatureReader(data)
    num_batches = int(len(feature_list) / batch_size)
    for i in range(num_batches):
        batch_length = rd.randint(min_len, max_len)

        # In some cases, the minimum length of the utterances is smaller than the batch length.
        # Use the smallest length as the real batch length.
        for j in range(batch_size):
            if feature_reader.utt2num_frames[feature_list[
                    i * batch_size + j].split(' ')[0]] < batch_length:
                batch_length = feature_reader.utt2num_frames[feature_list[
                    i * batch_size + j].split(' ')[0]]

        features = np.zeros((batch_size, batch_length, feature_reader.dim),
                            dtype=np.float32)
        labels = np.zeros((batch_size), dtype=np.int32)
        for j in range(batch_size):
            features[j, :, :], _ = feature_reader.read_segment(
                feature_list[i * batch_size + j],
                batch_length,
                shuffle=shuffle)
            labels[j] = features2spk[feature_list[i * batch_size + j]]
        queue.put((features, labels))
    stop_event.set()
    print("The process {} is about to exit.".format(os.getpid()))
    return
Ejemplo n.º 2
0
def batch_random(stop_event,
                 queue,
                 data,
                 spk2features,
                 num_total_speakers,
                 num_speakers=10,
                 num_segments=10,
                 min_len=200,
                 max_len=400,
                 shuffle=True,
                 seed=0):
    """Load features and fill a queue. Used in KaldiDataRandomQueue

    Args:
        stop_event: An event to tell the process to stop.
        queue: A queue to put the data.
        data: The kaldi data directory.
        spk2features: A dict from speaker index to the segments.
        num_total_speakers: The total number of speakers.
        num_speakers: The number of speakers in the batch.
        num_segments: The number of segments per speaker.
        min_len: The minimum length of the features.
        max_len: The maximum length of the features.
        shuffle: Load the feature from the 0-th frame or a random frame.
        seed: The value used to generate the random seed.
    """
    # TODO: If you use numpy.random in the sub-process, it is better to use:
    # local_state = np.random.RandomState(seed)
    # print local_state.uniform(0, 1, 5)
    #
    # The re-seed is necessary if numpy.random is used
    # You can use os.urandom to generate the `random` seed.
    rd = random.Random(os.urandom(4))
    rd.seed(seed)

    feature_reader = FeatureReader(data)
    speakers = list(spk2features.keys())  # 7323
    if num_total_speakers < num_speakers:
        print(
            "[Warning] The number of available speakers are less than the required speaker. Some speakers will be duplicated."
        )
        speakers = speakers * (int(num_speakers / num_total_speakers) + 1)
    # Now we have enough speakers
    while not stop_event.is_set():
        batch_speakers = rd.sample(speakers, num_speakers)  # 为选出的spk_id
        batch_length = rd.randint(
            min_len, max_len)  # 在min_len 200 和max_len 400之间随机选择一个batch_length
        features = np.zeros(
            (num_speakers * num_segments, batch_length, feature_reader.dim),
            dtype=np.float32)  # (batch_size, frame_length, feat_dim)
        labels = np.zeros((num_speakers * num_segments),
                          dtype=np.int32)  # (batch_size)
        for i, speaker in enumerate(batch_speakers):
            # The length may be larger than the utterance length. A check should be applied first.
            feature_list = []
            spk = speaker
            while len(feature_list) == 0:
                feature_list = []
                for feat in spk2features[spk]:
                    if feature_reader.utt2num_frames[feat.split(' ')
                                                     [0]] > batch_length:
                        feature_list.append(feat)
                if len(feature_list) == 0:
                    # The speaker is not appropriate for this batch. Resample the speaker
                    spk = rd.choice(list(set(speakers) - set(batch_speakers)))
                    batch_speakers[i] = spk

            labels[i * num_segments:(i + 1) * num_segments] = spk
            # If the number is not enough
            if len(feature_list) < num_segments:
                feature_list *= (int(num_segments / len(feature_list)) + 1
                                 )  # 对现有的list进行复制
            # Now the length of the list must be greater than the sample size.
            speaker_features = rd.sample(
                feature_list, num_segments
            )  # 从现有该说话人的feature_list中选出num_segments句作为speaker features
            for j, feat in enumerate(speaker_features):
                features[i * num_segments +
                         j, :, :], _ = feature_reader.read_segment(
                             feat, batch_length, shuffle=shuffle)
        queue.put((features, labels))

    time.sleep(3)
    while not queue.empty():
        try:
            queue.get(block=False)
        except:
            pass
    print("The process {} is about to exit.".format(os.getpid()))
    return