def eval_model(working_dir: str, model: DeepSpeakerModel): enable_deterministic() audio = Audio(working_dir) batcher = LazyTripletBatcher(working_dir, NUM_FRAMES, model) speakers_list = list(audio.speakers_to_utterances.keys()) num_negative_speakers = 99 num_speakers = len(speakers_list) y_pred = np.zeros(shape=(num_speakers, num_negative_speakers + 1)) # negatives + positive for i, positive_speaker in tqdm(enumerate(speakers_list), desc='test', total=num_speakers): # convention id[0] is anchor speaker, id[1] is positive, id[2:] are negative. input_data = batcher.get_speaker_verification_data( positive_speaker, num_negative_speakers) # batch size is not relevant. just making sure we don't push too much on the GPU. predictions = model.m.predict(input_data, batch_size=BATCH_SIZE) anchor_embedding = predictions[0] for j, other_than_anchor_embedding in enumerate( predictions[1:]): # positive + negatives y_pred[i][j] = batch_cosine_similarity( [anchor_embedding], [other_than_anchor_embedding])[0] # y_pred[i] = softmax(y_pred[i]) # could apply softmax here. y_true = np.zeros_like(y_pred) # positive is at index 0. y_true[:, 0] = 1.0 print(np.matrix(y_true)) print(np.matrix(y_pred)) print(np.min(y_pred), np.max(y_pred)) fm, tpr, acc, eer = evaluate(y_pred, y_true) return fm, tpr, acc, eer
def fit_model(dsm: DeepSpeakerModel, working_dir: str, max_length: int = NUM_FRAMES, batch_size=BATCH_SIZE): batcher = LazyTripletBatcher(working_dir, max_length, dsm) # build small test set. test_batches = [] for _ in tqdm(range(200), desc='Build test set'): test_batches.append(batcher.get_batch_test(batch_size)) def test_generator(): while True: for bb in test_batches: yield bb def train_generator(): while True: yield batcher.get_random_batch(batch_size, is_test=False) checkpoint_name = dsm.m.name + '_checkpoint' checkpoint_filename = os.path.join(CHECKPOINTS_TRIPLET_DIR, checkpoint_name + '_{epoch}.h5') checkpoint = ModelCheckpoint(monitor='val_loss', filepath=checkpoint_filename, save_best_only=True) dsm.m.fit(x=train_generator(), y=None, steps_per_epoch=2000, shuffle=False, epochs=1000, validation_data=test_generator(), validation_steps=len(test_batches), callbacks=[checkpoint])
def main2(): batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1] dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False) dsm.m.compile(optimizer='adam', loss=deep_speaker_loss) dsm.m.load_weights('/Users/premy/deep-speaker/ResCNN_checkpoint_102.h5', by_name=True) dsm.m.summary() batcher = LazyTripletBatcher(working_dir='/Users/premy/deep-speaker', max_length=NUM_FRAMES, model=dsm) bs = 18 print( np.mean([ dsm.m.evaluate(*batcher.get_batch_train(batch_size=bs), batch_size=bs, verbose=0) for _ in range(100) ])) print( np.mean([ dsm.m.evaluate(*batcher.get_batch_test(batch_size=bs), batch_size=bs, verbose=0) for _ in range(100) ])) print( np.mean([ dsm.m.evaluate(*batcher.get_random_batch(batch_size=bs, is_test=False), batch_size=bs, verbose=0) for _ in range(100) ])) print( np.mean([ dsm.m.evaluate(*batcher.get_random_batch(batch_size=bs, is_test=True), batch_size=bs, verbose=0) for _ in range(100) ]))
def fit_model(dsm: DeepSpeakerModel, working_dir: str, max_length: int = NUM_FRAMES, batch_size=BATCH_SIZE, epochs=1000, classify=False, initial_epoch=0): batcher = LazyTripletBatcher(working_dir, max_length, dsm, classify=classify) # build small test set. test_batches = [] for _ in tqdm(range(200), desc='Build test set'): test_batches.append( batcher.get_batch_test(batch_size, classify=classify)) def test_generator(): while True: for bb in test_batches: yield bb def train_generator(): while True: yield batcher.get_random_batch(batch_size, is_test=False, classify=classify) checkpoint_name = dsm.m.name + '_checkpoint' if classify: checkpoint_filename = os.path.join(CHECKPOINTS_CLASSIFY_DIR, checkpoint_name + '_{epoch}.h5') else: checkpoint_filename = os.path.join(CHECKPOINTS_TRIPLET_DIR, checkpoint_name + '_{epoch}.h5') checkpoint = ModelCheckpoint(monitor='val_loss', filepath=checkpoint_filename, save_best_only=True) early_stopping = EarlyStopping(monitor='loss', min_delta=0.001, patience=20, verbose=1) dsm.m.fit_generator(train_generator(), steps_per_epoch=2000, shuffle=False, epochs=epochs, validation_data=test_generator(), validation_steps=len(test_batches), initial_epoch=initial_epoch, callbacks=[checkpoint, early_stopping]) mfccs, y = load_data(os.path.join(working_dir, 'samples/train')) features = dsm.m.predict(mfccs) clf = svm.SVC() clf.fit(features, y) svm_pickle = open('svm.pkl', 'wb') pickle.dump(clf, svm_pickle) svm_pickle.close()