def batch_sequence(stop_event, queue, data, feature_list, features2spk, batch_size=128, min_len=200, max_len=400, shuffle=True, seed=0): """Load features and fill a queue. Used in KaldiDataSeqQueue. Args: stop_event: An event indicating the reading is finished. queue: A queue to put the data. data: The kaldi data directory. feature_list: A list shows which features the process should read. features2spk: A dict map features to speaker index. batch_size: The batch_size min_len: The minimum length of the features. max_len: The maximum length of the features. shuffle: Load the feature from the 0-th frame or a random frame. seed: The number is used to generate a random seed """ # Read the comment in batch_random rd = random.Random(os.urandom(4)) rd.seed(seed) # rd.jumpahead(seed) feature_reader = FeatureReader(data) num_batches = int(len(feature_list) / batch_size) for i in range(num_batches): batch_length = rd.randint(min_len, max_len) # In some cases, the minimum length of the utterances is smaller than the batch length. # Use the smallest length as the real batch length. for j in range(batch_size): if feature_reader.utt2num_frames[feature_list[ i * batch_size + j].split(' ')[0]] < batch_length: batch_length = feature_reader.utt2num_frames[feature_list[ i * batch_size + j].split(' ')[0]] features = np.zeros((batch_size, batch_length, feature_reader.dim), dtype=np.float32) labels = np.zeros((batch_size), dtype=np.int32) for j in range(batch_size): features[j, :, :], _ = feature_reader.read_segment( feature_list[i * batch_size + j], batch_length, shuffle=shuffle) labels[j] = features2spk[feature_list[i * batch_size + j]] queue.put((features, labels)) stop_event.set() print("The process {} is about to exit.".format(os.getpid())) return
# Load the pre-trained model to the target model directory. # The pre-trained model will be copied as the fine-tuned model and can be loaded from the new directory. # The pre-trained model is now just like an initialized model. get_pretrain_model(os.path.join(args.pretrain_model, "nnet"), os.path.join(args.finetune_model, "nnet"), args.checkpoint) # The model directory always has a folder named nnet model_dir = os.path.join(args.finetune_model, "nnet") # Set the random seed. The random operations may appear in data input, batch forming, etc. tf.set_random_seed(params.seed) random.seed(params.seed) np.random.seed(params.seed) dim = FeatureReader(args.train_dir).get_dim() if "selected_dim" in params.dict: dim = params.selected_dim with open(os.path.join(model_dir, "feature_dim"), "w") as f: f.write("%d\n" % dim) num_total_train_speakers = KaldiDataRandomQueue(args.train_dir, args.train_spklist).num_total_speakers tf.logging.info("There are %d speakers in the training set and the dim is %d" % (num_total_train_speakers, dim)) min_valid_loss = ValidLoss() # The trainer is used to control the training process trainer = Trainer(params, args.finetune_model, dim, num_total_train_speakers) trainer.build("train") trainer.build("valid")
parser.add_argument("model", type=str, help="The output model directory.") if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.INFO) args = parser.parse_args() params = save_codes_and_config(True, args.model, None) # The model directory always has a folder named nnet model_dir = os.path.join(args.model, "nnet") # Set the random seed. The random operations may appear in data input, batch forming, etc. tf.set_random_seed(params.seed) random.seed(params.seed) np.random.seed(params.seed) dim = FeatureReader(args.data_dir).get_dim() if "selected_dim" in params.dict: dim = params.selected_dim with open(args.data_spklist, 'r') as f: num_total_train_speakers = len(f.readlines()) trainer = Trainer(params, args.model, dim, num_total_train_speakers) trainer.build("valid") valid_loss, valid_embeddings, valid_labels = trainer.insight( args.data_dir, args.data_spklist, batch_type=params.batch_type, output_embeddings=True) eer = compute_cos_pairwise_eer(valid_embeddings, valid_labels) tf.logging.info("EER: %f" % eer) trainer.close()
def batch_random(stop_event, queue, data, spk2features, num_total_speakers, num_speakers=10, num_segments=10, min_len=200, max_len=400, shuffle=True, seed=0): """Load features and fill a queue. Used in KaldiDataRandomQueue Args: stop_event: An event to tell the process to stop. queue: A queue to put the data. data: The kaldi data directory. spk2features: A dict from speaker index to the segments. num_total_speakers: The total number of speakers. num_speakers: The number of speakers in the batch. num_segments: The number of segments per speaker. min_len: The minimum length of the features. max_len: The maximum length of the features. shuffle: Load the feature from the 0-th frame or a random frame. seed: The value used to generate the random seed. """ # TODO: If you use numpy.random in the sub-process, it is better to use: # local_state = np.random.RandomState(seed) # print local_state.uniform(0, 1, 5) # # The re-seed is necessary if numpy.random is used # You can use os.urandom to generate the `random` seed. rd = random.Random(os.urandom(4)) rd.seed(seed) feature_reader = FeatureReader(data) speakers = list(spk2features.keys()) # 7323 if num_total_speakers < num_speakers: print( "[Warning] The number of available speakers are less than the required speaker. Some speakers will be duplicated." ) speakers = speakers * (int(num_speakers / num_total_speakers) + 1) # Now we have enough speakers while not stop_event.is_set(): batch_speakers = rd.sample(speakers, num_speakers) # 为选出的spk_id batch_length = rd.randint( min_len, max_len) # 在min_len 200 和max_len 400之间随机选择一个batch_length features = np.zeros( (num_speakers * num_segments, batch_length, feature_reader.dim), dtype=np.float32) # (batch_size, frame_length, feat_dim) labels = np.zeros((num_speakers * num_segments), dtype=np.int32) # (batch_size) for i, speaker in enumerate(batch_speakers): # The length may be larger than the utterance length. A check should be applied first. feature_list = [] spk = speaker while len(feature_list) == 0: feature_list = [] for feat in spk2features[spk]: if feature_reader.utt2num_frames[feat.split(' ') [0]] > batch_length: feature_list.append(feat) if len(feature_list) == 0: # The speaker is not appropriate for this batch. Resample the speaker spk = rd.choice(list(set(speakers) - set(batch_speakers))) batch_speakers[i] = spk labels[i * num_segments:(i + 1) * num_segments] = spk # If the number is not enough if len(feature_list) < num_segments: feature_list *= (int(num_segments / len(feature_list)) + 1 ) # 对现有的list进行复制 # Now the length of the list must be greater than the sample size. speaker_features = rd.sample( feature_list, num_segments ) # 从现有该说话人的feature_list中选出num_segments句作为speaker features for j, feat in enumerate(speaker_features): features[i * num_segments + j, :, :], _ = feature_reader.read_segment( feat, batch_length, shuffle=shuffle) queue.put((features, labels)) time.sleep(3) while not queue.empty(): try: queue.get(block=False) except: pass print("The process {} is about to exit.".format(os.getpid())) return
import tensorflow as tf if __name__ == '__main__': tf.reset_default_graph() tf.logging.set_verbosity(tf.logging.INFO) nnet_dir = os.path.join(args.model_dir, "nnet") config_json = os.path.join(args.model_dir, "nnet/config.json") if not os.path.isfile(config_json): sys.exit("Cannot find params.json in %s" % config_json) params = Params(config_json) # First, we need to extract the weights num_total_train_speakers = KaldiDataRandomQueue( os.path.dirname(args.spklist), args.spklist).num_total_speakers dim = FeatureReader(os.path.dirname(args.spklist)).get_dim() if "selected_dim" in params.dict: dim = params.selected_dim trainer = Trainer(params, args.model_dir, dim, num_total_train_speakers, single_cpu=True) trainer.build("valid") trainer.sess.run(tf.global_variables_initializer()) trainer.sess.run(tf.local_variables_initializer()) if not args.init: curr_step = trainer.load() else: # Hack: