Esempio n. 1
0
def eval_model(working_dir: str, model: DeepSpeakerModel):
    enable_deterministic()
    audio = Audio(working_dir)
    batcher = LazyTripletBatcher(working_dir, NUM_FRAMES, model)
    speakers_list = list(audio.speakers_to_utterances.keys())
    num_negative_speakers = 99
    num_speakers = len(speakers_list)
    y_pred = np.zeros(shape=(num_speakers, num_negative_speakers +
                             1))  # negatives + positive
    for i, positive_speaker in tqdm(enumerate(speakers_list),
                                    desc='test',
                                    total=num_speakers):
        # convention id[0] is anchor speaker, id[1] is positive, id[2:] are negative.
        input_data = batcher.get_speaker_verification_data(
            positive_speaker, num_negative_speakers)
        # batch size is not relevant. just making sure we don't push too much on the GPU.
        predictions = model.m.predict(input_data, batch_size=BATCH_SIZE)
        anchor_embedding = predictions[0]
        for j, other_than_anchor_embedding in enumerate(
                predictions[1:]):  # positive + negatives
            y_pred[i][j] = batch_cosine_similarity(
                [anchor_embedding], [other_than_anchor_embedding])[0]
        # y_pred[i] = softmax(y_pred[i])
    # could apply softmax here.
    y_true = np.zeros_like(y_pred)  # positive is at index 0.
    y_true[:, 0] = 1.0
    print(np.matrix(y_true))
    print(np.matrix(y_pred))
    print(np.min(y_pred), np.max(y_pred))
    fm, tpr, acc, eer = evaluate(y_pred, y_true)
    return fm, tpr, acc, eer
def fit_model(dsm: DeepSpeakerModel,
              working_dir: str,
              max_length: int = NUM_FRAMES,
              batch_size=BATCH_SIZE):
    batcher = LazyTripletBatcher(working_dir, max_length, dsm)

    # build small test set.
    test_batches = []
    for _ in tqdm(range(200), desc='Build test set'):
        test_batches.append(batcher.get_batch_test(batch_size))

    def test_generator():
        while True:
            for bb in test_batches:
                yield bb

    def train_generator():
        while True:
            yield batcher.get_random_batch(batch_size, is_test=False)

    checkpoint_name = dsm.m.name + '_checkpoint'
    checkpoint_filename = os.path.join(CHECKPOINTS_TRIPLET_DIR,
                                       checkpoint_name + '_{epoch}.h5')
    checkpoint = ModelCheckpoint(monitor='val_loss',
                                 filepath=checkpoint_filename,
                                 save_best_only=True)
    dsm.m.fit(x=train_generator(),
              y=None,
              steps_per_epoch=2000,
              shuffle=False,
              epochs=1000,
              validation_data=test_generator(),
              validation_steps=len(test_batches),
              callbacks=[checkpoint])
def main2():
    batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1]
    dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False)
    dsm.m.compile(optimizer='adam', loss=deep_speaker_loss)
    dsm.m.load_weights('/Users/premy/deep-speaker/ResCNN_checkpoint_102.h5',
                       by_name=True)
    dsm.m.summary()
    batcher = LazyTripletBatcher(working_dir='/Users/premy/deep-speaker',
                                 max_length=NUM_FRAMES,
                                 model=dsm)
    bs = 18

    print(
        np.mean([
            dsm.m.evaluate(*batcher.get_batch_train(batch_size=bs),
                           batch_size=bs,
                           verbose=0) for _ in range(100)
        ]))
    print(
        np.mean([
            dsm.m.evaluate(*batcher.get_batch_test(batch_size=bs),
                           batch_size=bs,
                           verbose=0) for _ in range(100)
        ]))
    print(
        np.mean([
            dsm.m.evaluate(*batcher.get_random_batch(batch_size=bs,
                                                     is_test=False),
                           batch_size=bs,
                           verbose=0) for _ in range(100)
        ]))
    print(
        np.mean([
            dsm.m.evaluate(*batcher.get_random_batch(batch_size=bs,
                                                     is_test=True),
                           batch_size=bs,
                           verbose=0) for _ in range(100)
        ]))
Esempio n. 4
0
def fit_model(dsm: DeepSpeakerModel,
              working_dir: str,
              max_length: int = NUM_FRAMES,
              batch_size=BATCH_SIZE,
              epochs=1000,
              classify=False,
              initial_epoch=0):
    batcher = LazyTripletBatcher(working_dir,
                                 max_length,
                                 dsm,
                                 classify=classify)

    # build small test set.
    test_batches = []
    for _ in tqdm(range(200), desc='Build test set'):
        test_batches.append(
            batcher.get_batch_test(batch_size, classify=classify))

    def test_generator():
        while True:
            for bb in test_batches:
                yield bb

    def train_generator():
        while True:
            yield batcher.get_random_batch(batch_size,
                                           is_test=False,
                                           classify=classify)

    checkpoint_name = dsm.m.name + '_checkpoint'

    if classify:
        checkpoint_filename = os.path.join(CHECKPOINTS_CLASSIFY_DIR,
                                           checkpoint_name + '_{epoch}.h5')
    else:
        checkpoint_filename = os.path.join(CHECKPOINTS_TRIPLET_DIR,
                                           checkpoint_name + '_{epoch}.h5')

    checkpoint = ModelCheckpoint(monitor='val_loss',
                                 filepath=checkpoint_filename,
                                 save_best_only=True)
    early_stopping = EarlyStopping(monitor='loss',
                                   min_delta=0.001,
                                   patience=20,
                                   verbose=1)

    dsm.m.fit_generator(train_generator(),
                        steps_per_epoch=2000,
                        shuffle=False,
                        epochs=epochs,
                        validation_data=test_generator(),
                        validation_steps=len(test_batches),
                        initial_epoch=initial_epoch,
                        callbacks=[checkpoint, early_stopping])

    mfccs, y = load_data(os.path.join(working_dir, 'samples/train'))
    features = dsm.m.predict(mfccs)

    clf = svm.SVC()
    clf.fit(features, y)
    svm_pickle = open('svm.pkl', 'wb')
    pickle.dump(clf, svm_pickle)
    svm_pickle.close()