Example #1
0
def main():
    #load the most recent h5
    directory = c.best_checkpoint_dir
    #best_checkpoints = glob.glob('./checkpoints/model_900_0.49655.h5')
    files = sorted(filter(
        lambda f: os.path.isfile(f) and f.endswith(".h5"),
        map(lambda f: os.path.join(directory, f), os.listdir(directory))),
                   key=os.path.getmtime)
    best_model = files[-1]
    print("Loaded %s" % (best_model))

    input_shape = (256, 256, 3)
    model = convolutional_model(input_shape, batch_size=c.batch_size)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.load_weights(best_model)
    test_dataset = load_dataset('test')
    loader_test = loader(test_dataset, c.batch_size)
    test_steps = len(test_dataset['label']) / c.batch_size

    accs = []
    for i in range(test_steps):
        x_test, y_test = loader_test.next()
        _loss, _acc = model.test_on_batch(x_test, y_test)
        accs.append(_acc)
    print(np.mean(np.array(accs)))
Example #2
0
def pred(x):
    batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH
    train_path = c.TEST_DIR

    id_to_labels = c.DICT
    
    no_of_speakers = 142

    b = x[0]
    num_frames = b.shape[0]
    base_model = convolutional_model(input_shape=x.shape[1:], batch_size=batch_size, num_frames=num_frames)

    y = base_model.output
    y = Dense(no_of_speakers, activation='softmax',name='softmax_layer')(y)

    model = Model(base_model.input, y)


    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    last_checkpoint = utils.get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER)
    if last_checkpoint is not None:
        model.load_weights(last_checkpoint)
        grad_steps = int(last_checkpoint.split('_')[-2])

    index = np.argmax(model.predict_on_batch(x))
    
    return id_to_labels[index], index
Example #3
0
def test_predict_one_file():
    model = convolutional_model()
    wav_path = 'audio/LibriSpeechSamples/train-clean-100/19/198/19-198-0000.wav'
    _, s = read(wav_path)
    s = s / (2**15)
    feature = extract_features(s, target_sample_rate=c.SAMPLE_RATE)
    feature = clipped_audio(feature)
    last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
    if last_checkpoint is not None:
        print(f"Found checkpoint {last_checkpoint}. Resume from here...")
        model.load_weights(last_checkpoint)
    feature = feature[np.newaxis, ...]
    print(feature.shape)
    emb_vector = model.predict(feature)
    print(emb_vector.shape)
    assert emb_vector.shape[1] == 512
Example #4
0
def main(libri_dir=DATASET_DIR):
    libri = read_librispeech_structure(libri_dir)
    batch = stochastic_mini_batch(libri, batch_size=BATCH_NUM_TRIPLETS)
    batch_size = BATCH_NUM_TRIPLETS * 3  # A triplet has 3 parts.
    x, y = batch.to_inputs()
    b = x[0]
    num_frames = b.shape[0]
    print('num_frames = ', num_frames)

    model = convolutional_model(batch_input_shape=[batch_size * num_frames] + list(b.shape[1:]),
                                batch_size=batch_size, num_frames=num_frames)
    model.compile(optimizer='adam', loss=deep_speaker_loss)

    print(model.summary())
    grad_steps = 0
    orig_time = time()
    while True:
        batch = stochastic_mini_batch(libri, batch_size=BATCH_NUM_TRIPLETS)
        x, _ = batch.to_inputs()

        # output.shape = (3, 383, 32, 32, 3) something like this
        # explanation  = (batch_size, num_frames, width, height, channels)
        x = np.reshape(x, (batch_size * num_frames, b.shape[2], b.shape[2], b.shape[3]))

        # we don't need to use the targets y, because we know by the convention that:
        # we have [anchors, positive examples, negative examples]. The loss only uses x and
        # can determine if a sample is an anchor, positive or negative sample.
        stub_targets = np.random.uniform(size=(x.shape[0], 1))
        # result = model.predict(x, batch_size=x.shape[0])
        # print(result.shape)
        # np.set_printoptions(precision=2)
        # print(result[0:20, 0:5])
        loss = model.train_on_batch(x, stub_targets)
        print('batch #{0} processed in {1:.2f}s, training loss = {2}.'.format(grad_steps, time() - orig_time, loss))
        grad_steps += 1
        orig_time = time()
Example #5
0
import constants as c
from models import convolutional_model
from utils import get_last_checkpoint_if_any

model = convolutional_model()
last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
print(last_checkpoint)
model.load_weights(last_checkpoint)
Example #6
0
from time import time

import numpy as np

from models import convolutional_model
from pre_process import next_batch
from triplet_loss import deep_speaker_loss
from constants import BATCH_NUM_TRIPLETS

if __name__ == '__main__':
    b = next_batch()
    num_frames = b.shape[0]

    model = convolutional_model(
        batch_input_shape=[BATCH_NUM_TRIPLETS * num_frames] +
        list(b.shape[1:]),
        batch_size=BATCH_NUM_TRIPLETS,
        num_frames=num_frames)
    model.compile(optimizer='adam', loss=deep_speaker_loss)

    print(model.summary())
    grad_steps = 0
    orig_time = time()
    while True:
        anc1 = next_batch()
        anc2 = next_batch()
        pos1 = next_batch()
        pos2 = next_batch()
        neg1 = next_batch()
        neg2 = next_batch()
        batch = np.concatenate([anc1, anc2, pos1, pos2, neg1, neg2], axis=0)
Example #7
0
def main(libri_dir=c.DATASET_DIR):
    logging.info('Looking for audio [wav] files in {}.'.format(libri_dir))
    libri = read_librispeech_structure(libri_dir)

    if len(libri) == 0:
        logging.warning(
            'Have you converted flac files to wav? If not, run audio/convert_flac_2_wav.sh'
        )
        exit(1)

    batch = stochastic_mini_batch(libri, batch_size=c.BATCH_NUM_TRIPLETS)
    batch_size = c.BATCH_NUM_TRIPLETS * 3  # A triplet has 3 parts.
    x, y = batch.to_inputs()
    b = x[0]
    num_frames = b.shape[0]
    logging.info('num_frames = {}'.format(num_frames))

    batch_shape = [batch_size * num_frames] + list(b.shape[1:])
    logging.info('batch shape: {}'.format(batch_shape))
    logging.info('batch size: {}'.format(batch_size))
    model = convolutional_model(batch_input_shape=batch_shape,
                                batch_size=batch_size,
                                num_frames=num_frames)
    logging.info(model.summary())

    logging.info('Compiling the model...')
    model.compile(optimizer='adam', loss=deep_speaker_loss)
    logging.info('[DONE]')

    grad_steps = 0
    last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
    if last_checkpoint is not None:
        logging.info('Found checkpoint [{}]. Resume from here...'.format(
            last_checkpoint))
        model.load_weights(last_checkpoint)
        grad_steps = int(last_checkpoint.split('_')[-2])
        logging.info('[DONE]')

    logging.info('Starting training...')
    orig_time = time()

    while True:
        grad_steps += 1
        batch = stochastic_mini_batch(libri, batch_size=c.BATCH_NUM_TRIPLETS)
        x, _ = batch.to_inputs()

        # output.shape = (3, 383, 32, 32, 3) something like this
        # explanation  = (batch_size, num_frames, width, height, channels)
        logging.info('x.shape before reshape: {}'.format(x.shape))
        x = np.reshape(
            x, (batch_size * num_frames, b.shape[2], b.shape[2], b.shape[3]))
        logging.info('x.shape after  reshape: {}'.format(x.shape))

        # we don't need to use the targets y, because we know by the convention that:
        # we have [anchors, positive examples, negative examples]. The loss only uses x and
        # can determine if a sample is an anchor, positive or negative sample.
        stub_targets = np.random.uniform(size=(x.shape[0], 1))
        # result = model.predict(x, batch_size=x.shape[0])
        # logging.info(result.shape)
        # np.set_printoptions(precision=2)
        # logging.info(result[0:20, 0:5])

        logging.info('-' * 80)
        logging.info('== Presenting batch #{0}'.format(grad_steps))
        logging.info(batch.libri_batch)
        loss = model.train_on_batch(x, stub_targets)
        logging.info(
            '== Processed in {0:.2f}s by the network, training loss = {1}.'.
            format(time() - orig_time, loss))
        orig_time = time()

        # record training loss
        with open(c.LOSS_FILE, "a") as f:
            f.write("{0},{1}\n".format(grad_steps, loss))

        # checkpoints are really heavy so let's just keep the last one.
        create_dir_and_delete_content(c.CHECKPOINT_FOLDER)
        model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(
            c.CHECKPOINT_FOLDER, grad_steps, loss))
Example #8
0
def main(libri_dir=c.DATASET_DIR):

    PRE_TRAIN = c.PRE_TRAIN
    logging.info(
        'Looking for fbank features [.npy] files in {}.'.format(libri_dir))
    libri = data_catalog(libri_dir)

    if len(libri) == 0:
        logging.warning(
            'Cannot find npy files, we will load audio, extract features and save it as npy file'
        )
        logging.warning('Waiting for preprocess...')
        preprocess_and_save(c.WAV_DIR, c.DATASET_DIR)
        libri = data_catalog(libri_dir)
        if len(libri) == 0:
            logging.warning(
                'Have you converted flac files to wav? If not, run audio/convert_flac_2_wav.sh'
            )
            exit(1)
    unique_speakers = libri['speaker_id'].unique()
    spk_utt_dict, unique_speakers = create_dict(libri['filename'].values,
                                                libri['speaker_id'].values,
                                                unique_speakers)
    select_batch.create_data_producer(unique_speakers, spk_utt_dict)

    batch = stochastic_mini_batch(libri,
                                  batch_size=c.BATCH_SIZE,
                                  unique_speakers=unique_speakers)
    batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH
    x, y = batch.to_inputs()
    b = x[0]
    num_frames = b.shape[0]
    train_batch_size = batch_size
    #batch_shape = [batch_size * num_frames] + list(b.shape[1:])  # A triplet has 3 parts.
    input_shape = (num_frames, b.shape[1], b.shape[2])

    logging.info('num_frames = {}'.format(num_frames))
    logging.info('batch size: {}'.format(batch_size))
    logging.info('input shape: {}'.format(input_shape))
    logging.info('x.shape : {}'.format(x.shape))
    orig_time = time()
    model = convolutional_model(input_shape=input_shape,
                                batch_size=batch_size,
                                num_frames=num_frames)
    logging.info(model.summary())
    gru_model = None
    if c.COMBINE_MODEL:
        gru_model = recurrent_model(input_shape=input_shape,
                                    batch_size=batch_size,
                                    num_frames=num_frames)
        logging.info(gru_model.summary())
    grad_steps = 0

    if PRE_TRAIN:
        last_checkpoint = get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER)
        if last_checkpoint is not None:
            logging.info(
                'Found pre-training checkpoint [{}]. Resume from here...'.
                format(last_checkpoint))
            x = model.output
            x = Dense(len(unique_speakers),
                      activation='softmax',
                      name='softmax_layer')(x)
            pre_model = Model(model.input, x)
            pre_model.load_weights(last_checkpoint)
            grad_steps = int(last_checkpoint.split('_')[-2])
            logging.info('Successfully loaded pre-training model')

    else:
        last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
        if last_checkpoint is not None:
            logging.info('Found checkpoint [{}]. Resume from here...'.format(
                last_checkpoint))
            model.load_weights(last_checkpoint)
            grad_steps = int(last_checkpoint.split('_')[-2])
            logging.info('[DONE]')
        if c.COMBINE_MODEL:
            last_checkpoint = get_last_checkpoint_if_any(
                c.GRU_CHECKPOINT_FOLDER)
            if last_checkpoint is not None:
                logging.info(
                    'Found checkpoint [{}]. Resume from here...'.format(
                        last_checkpoint))
                gru_model.load_weights(last_checkpoint)
                logging.info('[DONE]')

    #adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model.compile(optimizer='adam', loss=deep_speaker_loss)
    if c.COMBINE_MODEL:
        gru_model.compile(optimizer='adam', loss=deep_speaker_loss)
    print("model_build_time", time() - orig_time)
    logging.info('Starting training...')
    lasteer = 10
    eer = 1
    while True:
        orig_time = time()
        x, _ = select_batch.best_batch(model, batch_size=c.BATCH_SIZE)
        print("select_batch_time:", time() - orig_time)
        y = np.random.uniform(size=(x.shape[0], 1))
        logging.info('== Presenting step #{0}'.format(grad_steps))
        orig_time = time()
        loss = model.train_on_batch(x, y)
        logging.info(
            '== Processed in {0:.2f}s by the network, training loss = {1}.'.
            format(time() - orig_time, loss))
        if c.COMBINE_MODEL:
            loss1 = gru_model.train_on_batch(x, y)
            logging.info(
                '== Processed in {0:.2f}s by the gru-network, training loss = {1}.'
                .format(time() - orig_time, loss1))
            with open(c.GRU_CHECKPOINT_FOLDER + '/losses_gru.txt', "a") as f:
                f.write("{0},{1}\n".format(grad_steps, loss1))
        # record training loss
        with open(c.LOSS_LOG, "a") as f:
            f.write("{0},{1}\n".format(grad_steps, loss))
        if (grad_steps) % 10 == 0:
            fm1, tpr1, acc1, eer1, fprr1, tprr1, auc1 = eval_model(
                model,
                train_batch_size,
                test_dir=c.DATASET_DIR,
                check_partial=True,
                gru_model=gru_model)
            logging.info(
                'test training data EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f}, AUC = {2:.3f} '
                .format(eer1, fm1, acc1, auc1))
            with open(c.CHECKPOINT_FOLDER + '/train_acc_eer.txt', "a") as f:
                f.write("{0},{1},{2},{3}\n".format(grad_steps, eer1, fm1,
                                                   acc1))

        if (grad_steps) % c.TEST_PER_EPOCHS == 0:
            fm, tpr, acc, eer, fprr, tprr, auc = eval_model(
                model,
                train_batch_size,
                test_dir=c.TEST_DIR,
                gru_model=gru_model)
            logging.info(
                '== Testing model after batch #{0}'.format(grad_steps))
            logging.info(
                'EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f}, AUC = {2:.3f} '
                .format(eer, fm, acc, auc))
            with open(c.TEST_LOG, "a") as f:
                f.write("{0},{1},{2},{3}\n".format(grad_steps, eer, fm, acc))

        # checkpoints are really heavy so let's just keep the last one.
        if (grad_steps) % c.SAVE_PER_EPOCHS == 0:
            create_dir_and_delete_content(c.CHECKPOINT_FOLDER)
            model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(
                c.CHECKPOINT_FOLDER, grad_steps, loss))
            if c.COMBINE_MODEL:
                gru_model.save_weights('{0}/grumodel_{1}_{2:.5f}.h5'.format(
                    c.GRU_CHECKPOINT_FOLDER, grad_steps, loss1))
            if eer < lasteer:
                files = sorted(filter(
                    lambda f: os.path.isfile(f) and f.endswith(".h5"),
                    map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f),
                        os.listdir(c.BEST_CHECKPOINT_FOLDER))),
                               key=lambda file: file.split('/')[-1].split('.')[
                                   -2],
                               reverse=True)
                lasteer = eer
                for file in files[:-4]:
                    logging.info("removing old model: {}".format(file))
                    os.remove(file)
                model.save_weights(
                    c.BEST_CHECKPOINT_FOLDER +
                    '/best_model{0}_{1:.5f}.h5'.format(grad_steps, eer))
                if c.COMBINE_MODEL:
                    files = sorted(
                        filter(
                            lambda f: os.path.isfile(f) and f.endswith(".h5"),
                            map(
                                lambda f: os.path.join(
                                    c.BEST_CHECKPOINT_FOLDER, f),
                                os.listdir(c.BEST_CHECKPOINT_FOLDER))),
                        key=lambda file: file.split('/')[-1].split('.')[-2],
                        reverse=True)
                    lasteer = eer
                    for file in files[:-4]:
                        logging.info("removing old model: {}".format(file))
                        os.remove(file)
                    gru_model.save_weights(c.BEST_CHECKPOINT_FOLDER +
                                           '/best_gru_model{0}_{1:.5f}.h5'.
                                           format(grad_steps, eer))

        grad_steps += 1
Example #9
0
def predict():
    global old_file_name
    embeddings_list = []
    features_list = []
    x = []
    classes_list, labels_list, files_list, indexs_list = extract_test_audio(
        False)

    for file in files_list:
        x_ = np.load(file)
        x_ = clipped_audio(x_)
        x.append(x_)
    x = np.array(x)

    last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
    model = convolutional_model()

    model.compile(optimizer='adam', loss=deep_speaker_loss)
    embeddings_list = model.predict_on_batch(x)

    sims = matrix_cosine_similarity(embeddings_list, embeddings_list)
    L = len(files_list)
    for i in range(L):
        if labels_list[i] == "other":
            cosine_max = -1
            error_label = {}
            for speaker in classes_list:
                average_cosine = 0
                error_label[speaker] = 0
                for index in indexs_list[speaker]:
                    average_cosine += sims[i][index]
                    if sims[i][index] > 0.9:
                        error_label[speaker] += 1
                        # print("file_name: {} - speaker: {}".format(old_file_name[i], old_file_name[index]))
                if average_cosine <= 0:
                    average_cosine = 0
                else:
                    average_cosine /= len(indexs_list[speaker])
                if average_cosine > cosine_max:
                    if labels_list[i] == "other":
                        indexs_list[speaker].append(i)
                    else:
                        del indexs_list[labels_list[i]][-1]
                        indexs_list[speaker].append(i)
                    labels_list[i] = speaker
                    cosine_max = average_cosine
            for speaker in classes_list:
                print("file_name: {} - speaker:{} - err:{}".format(
                    old_file_name[i], speaker, error_label[speaker]))

            if cosine_max < 0.6:
                labels_list[i] = "other"
            # print("file_name: {} - labels: {} - cs_max: {}".format(old_file_name[i], labels_list[i], cosine_max))

    # for i in range(L):
    #     if labels_list[i] == "other":
    #         cosine_max = -2
    #         for j in range(L):
    #             if i!=j and sims[i][j] > 0.9:
    #                 print("file_name: {} - speaker: {}".format(old_file_name[i], old_file_name[j]))
    #             if i!=j and labels_list[j] != "other" and sims[i][j] > cosine_max:
    #                 labels_list[i] = labels_list[j]
    #                 cosine_max = sims[i][j]

    #         if cosine_max < 0.5:
    #             labels_list[i] = "other"

    write_csv(old_file_name, labels_list)
def main():
    batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH
    # train_path = "/Users/walle/PycharmProjects/Speech/coding/deep-speaker-master/audio/LibriSpeechSamples/train-clean-100"
    train_path = c.DATASET_DIR

    libri = data_catalog(train_path)
    files = list(libri['filename'])
    labels1 = list(libri['speaker_id'])

    labels_to_id = {}
    id_to_labels = {}
    i = 0

    for label in np.unique(labels1):
        labels_to_id[label] = i
        id_to_labels[i] = label
        i += 1

    no_of_speakers = len(np.unique(labels1))

    train_data, test_data = split_data(files, labels1, batch_size)
    batchloader = batchTrainingImageLoader(train_data,
                                           labels_to_id,
                                           no_of_speakers,
                                           batch_size=batch_size)
    testloader = batchTestImageLoader(test_data,
                                      labels_to_id,
                                      no_of_speakers,
                                      batch_size=batch_size)
    test_steps = int(len(test_data) / batch_size)
    x_test, y_test = testloader.__next__()
    b = x_test[0]
    num_frames = b.shape[0]
    logging.info('num_frames = {}'.format(num_frames))
    logging.info('batch size: {}'.format(batch_size))
    logging.info("x_shape:{0}, y_shape:{1}".format(x_test.shape, y_test.shape))

    base_model = convolutional_model(input_shape=x_test.shape[1:],
                                     batch_size=batch_size,
                                     num_frames=num_frames)
    x = base_model.output
    x = Dense(no_of_speakers, activation='softmax', name='softmax_layer')(x)

    model = Model(base_model.input, x)
    logging.info(model.summary())
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print("printing format per batch:", model.metrics_names)
    # y_ = np.argmax(y_train, axis=0)
    # class_weights = sklearn.utils.class_weight.compute_class_weight('balanced', np.unique(y_), y_)

    grad_steps = 0
    last_checkpoint = utils.get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER)
    last_checkpoint = None
    if last_checkpoint is not None:
        logging.info('Found checkpoint [{}]. Resume from here...'.format(
            last_checkpoint))
        model.load_weights(last_checkpoint)
        grad_steps = int(last_checkpoint.split('_')[-2])
        logging.info('[DONE]')

    orig_time = time()

    Num_Iter = 100000
    current_iter = 0
    while current_iter < Num_Iter:
        current_iter += 1
        orig_time = time()
        x_train, y_train = batchloader.__next__()
        [loss, acc] = model.train_on_batch(x_train,
                                           y_train)  # return [loss, acc]
        logging.info(
            'Train Steps:{0}, Time:{1:.2f}s, Loss={2}, Accuracy={3}'.format(
                grad_steps,
                time() - orig_time, loss, acc))

        with open(c.PRE_CHECKPOINT_FOLDER + "/train_loss_acc.txt", "a") as f:
            f.write("{0},{1},{2}\n".format(grad_steps, loss, acc))

        if grad_steps % c.TEST_PER_EPOCHS == 0:
            losses = []
            accs = []
            for ss in range(test_steps):
                [loss, acc] = model.test_on_batch(x_test, y_test)
                x_test, y_test = testloader.__next__()
                losses.append(loss)
                accs.append(acc)
            loss = np.mean(np.array(losses))
            acc = np.mean(np.array(accs))
            print("loss", loss, "acc", acc)
            logging.info(
                'Test the Data ---------- Steps:{0}, Loss={1}, Accuracy={2}, '.
                format(grad_steps, loss, acc))
            with open(c.PRE_CHECKPOINT_FOLDER + "/test_loss_acc.txt",
                      "a") as f:
                f.write("{0},{1},{2}\n".format(grad_steps, loss, acc))

        if grad_steps % c.SAVE_PER_EPOCHS == 0:
            utils.create_dir_and_delete_content(c.PRE_CHECKPOINT_FOLDER)
            model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(
                c.PRE_CHECKPOINT_FOLDER, grad_steps, loss))

        grad_steps += 1
Example #11
0
def main():
    num_epoches = 200
    #1. first load data
    train_dataset = load_dataset('train')
    valid_dataset = load_dataset('valid')
    loader_train = loader(train_dataset, c.batch_size)
    loader_valid = loader(valid_dataset, c.batch_size)
    test_steps = len(valid_dataset['label']) / c.batch_size
    print(len(train_dataset['label']))
    logging.info("training %d valid %d" %
                 (len(train_dataset['label']), len(valid_dataset['label'])))
    #2. then load model
    input_shape = (256, 256, 3)
    model = convolutional_model(input_shape, batch_size=c.batch_size)

    #logging.info(model.summary())
    opt = optimizers.Adam(lr=0.0001)
    model.compile(optimizer=opt,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    grad_steps = 0
    last_checkpoint = get_last_checkpoint_if_any(c.checkpoint_folder)

    steps_per_epoch = len(train_dataset['label']) / c.batch_size

    #last_checkpoint = None
    best_acc = 0
    print(last_checkpoint)
    if last_checkpoint is not None:
        logging.info('Found checkpoint [{}]. Resume from here...'.format(
            last_checkpoint))
        print('loadding checkpoing %s' % (last_checkpoint))
        model.load_weights(last_checkpoint)
        grad_steps = int(last_checkpoint.split('_')[-2])
        logging.info('[DONE]')
    for i in range(num_epoches):
        print("Epoch %d" % (i))
        for j in range(steps_per_epoch):
            orig_time = time()
            x_train, y_train = loader_train.next()
            [loss, acc] = model.train_on_batch(x_train,
                                               y_train)  # return [loss, acc]
            logging.info(
                'Train Steps:{0}, Time:{1:.2f}s, Loss={2}, Accuracy={3}'.
                format(grad_steps,
                       time() - orig_time, loss, acc))
            if (grad_steps % 100 == 0):
                print(
                    "Training epoch   [%d] steps  [%d]    acc [%f]      loss [%f]"
                    % (i, grad_steps, acc, loss))
            with open(c.checkpoint_folder + "/train_loss_acc.txt", "a") as f:
                f.write("{0},{1},{2}\n".format(grad_steps, loss, acc))

            if grad_steps % c.test_per_epoches == 0:
                losses = []
                accs = []
                for ss in range(test_steps):
                    x_valid, y_valid = loader_valid.next()
                    [loss, acc] = model.test_on_batch(x_valid, y_valid)
                    losses.append(loss)
                    accs.append(acc)
                loss = np.mean(np.array(losses))
                acc = np.mean(np.array(accs))
                print("Test at epoch    ", i, "steps    ", grad_steps,
                      "avg loss   ", loss, "avg acc   ", acc)
                logging.info(
                    'Test the Data ---------- Steps:{0}, Loss={1}, Accuracy={2}, '
                    .format(grad_steps, loss, acc))
                with open(c.checkpoint_folder + "/test_loss_acc.txt",
                          "a") as f:
                    f.write("{0},{1},{2}\n".format(grad_steps, loss, acc))
                    if grad_steps % c.save_per_epoches == 0:
                        create_dir_and_delete_content(c.checkpoint_folder)
                        model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(
                            c.checkpoint_folder, grad_steps, loss))
                # Save the best one
                if acc > best_acc:
                    best_acc = acc
                    create_dir_and_delete_content(c.best_checkpoint_dir)
                    model.save_weights(
                        c.best_checkpoint_dir +
                        '/best_model{0}_{1:.5f}.h5'.format(grad_steps, acc))
            grad_steps += 1
Example #12
0
import numpy as np

from constants import BATCH_NUM_TRIPLETS, NUM_FRAMES
from models import convolutional_model


def normalize_frames(m):
    return [(v - np.mean(v)) / np.std(v) for v in m]


if __name__ == '__main__':
    network_inputs = np.random.uniform(size=(BATCH_NUM_TRIPLETS, NUM_FRAMES,
                                             16, 16, 1))

    model = convolutional_model(batch_input_shape=(BATCH_NUM_TRIPLETS *
                                                   NUM_FRAMES, 16, 16, 1))

    from triplet_loss import deep_speaker_loss
    model.compile(optimizer='adam',
                  loss=deep_speaker_loss,
                  metrics=['accuracy'])

    network_inputs = np.reshape(network_inputs, (-1, 16, 16, 1))

    output = model.predict(network_inputs)

    # stub_targets = np.expand_dims([0] * BATCH_SIZE * NUM_FRAMES, axis=1)
    stub_targets = np.random.uniform(size=(BATCH_NUM_TRIPLETS * NUM_FRAMES,
                                           512))
    print(model.train_on_batch(network_inputs, stub_targets))
def main(libri_dir=c.DATASET_DIR):

    PRE_TRAIN = c.PRE_TRAIN
    logging.info('Looking for fbank features [.npy] files in {}.'.format(libri_dir))
    libri = data_catalog(libri_dir)

    if len(libri) == 0:
        logging.warning('Cannot find npy files, we will load audio, extract features and save it as npy file')
        logging.warning('Waiting for preprocess...')
        preprocess_and_save(c.WAV_DIR, c.DATASET_DIR)
        libri = data_catalog(libri_dir)
        if len(libri) == 0:
            logging.warning('Have you converted flac files to wav? If not, run audio/convert_flac_2_wav.sh')
            exit(1)
    unique_speakers = libri['speaker_id'].unique()
    spk_utt_dict, unique_speakers = create_dict(libri['filename'].values,libri['speaker_id'].values,unique_speakers)
    select_batch.create_data_producer(unique_speakers, spk_utt_dict)

    batch = stochastic_mini_batch(libri, batch_size=c.BATCH_SIZE, unique_speakers=unique_speakers)
    batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH
    x, y = batch.to_inputs()
    b = x[0]
    num_frames = b.shape[0]
    train_batch_size = batch_size
    #batch_shape = [batch_size * num_frames] + list(b.shape[1:])  # A triplet has 3 parts.
    input_shape = (num_frames, b.shape[1], b.shape[2])

    logging.info('num_frames = {}'.format(num_frames))
    logging.info('batch size: {}'.format(batch_size))
    logging.info('input shape: {}'.format(input_shape))
    logging.info('x.shape : {}'.format(x.shape))
    # 按理x.shape:(batchsize, num_frames, 64, 1)
    orig_time = time()
    model = convolutional_model(input_shape=input_shape, batch_size=batch_size, num_frames=num_frames)
    logging.info(model.summary())
    gru_model = None
    if c.COMBINE_MODEL:
        gru_model = recurrent_model(input_shape=input_shape, batch_size=batch_size, num_frames=num_frames)
        logging.info(gru_model.summary())
    grad_steps = 0

    if PRE_TRAIN:
        last_checkpoint = get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER)
        if last_checkpoint is not None:
            logging.info('Found pre-training checkpoint [{}]. Resume from here...'.format(last_checkpoint))
            x = model.output
            x = Dense(len(unique_speakers), activation='softmax', name='softmax_layer')(x)
            pre_model = Model(model.input, x)
            pre_model.load_weights(last_checkpoint)
            grad_steps = int(last_checkpoint.split('_')[-2])
            logging.info('Successfully loaded pre-training model')

    else:
        last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
        if last_checkpoint is not None:
            logging.info('Found checkpoint [{}]. Resume from here...'.format(last_checkpoint))
            model.load_weights(last_checkpoint)
            grad_steps = int(last_checkpoint.split('_')[-2])
            logging.info('[DONE]')
        if c.COMBINE_MODEL:
            last_checkpoint = get_last_checkpoint_if_any(c.GRU_CHECKPOINT_FOLDER)
            if last_checkpoint is not None:
                logging.info('Found checkpoint [{}]. Resume from here...'.format(last_checkpoint))
                gru_model.load_weights(last_checkpoint)
                logging.info('[DONE]')

    #adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model.compile(optimizer='adam', loss=deep_speaker_loss)
    if c.COMBINE_MODEL:
        gru_model.compile(optimizer='adam', loss=deep_speaker_loss)
    print("model_build_time",time()-orig_time)
    logging.info('Starting training...')
    lasteer = 10
    eer = 1
    # ======================================================================2020/05/21 10:38
    train_times = []  # ===========================================================================2020/05/20 16:30
    total_times = 0  # ===========================================================================2020/05/20 16:30
    # 迭代10个epoch,每个epoch200个batch
    # while True:
    os.makedirs(c.BEST_CHECKPOINT_FOLDER, exist_ok=True)
    while grad_steps < 2001 :
        # ======================================================================结束
        orig_time = time()
        x, _ = select_batch.best_batch(model, batch_size=c.BATCH_SIZE)
        print("select_batch_time:", time() - orig_time)
        y = np.random.uniform(size=(x.shape[0], 1))
        # If "ValueError: Error when checking target: expected ln to have shape (None, 512) but got array with shape (96, 1)"
        # please modify line 121 to following line
        # y = np.random.uniform(size=(x.shape[0], 512))
        logging.info('== Presenting step #{0}'.format(grad_steps))
        orig_time = time()
        # ======================================================================2020/05/21 10:38
        # 记录训练batch时间
        # 记录迭代训练开始时间
        begin_time = time()  # ===========================================================================2020/05/20 16:30
        loss = model.train_on_batch(x, y)
        # 记录迭代训练结束时间
        train_end_time = time()  # ===========================================================================2020/05/20 16:30
        # ======================================================================结束
        logging.info('== Processed in {0:.2f}s by the network, training loss = {1}.'.format(time() - orig_time, loss))
        if c.COMBINE_MODEL:
            loss1 = gru_model.train_on_batch(x, y)
            logging.info( '== Processed in {0:.2f}s by the gru-network, training loss = {1}.'.format(time() - orig_time, loss1))
            with open(c.GRU_CHECKPOINT_FOLDER + '/losses_gru.txt', "a") as f:
                f.write("{0},{1}\n".format(grad_steps, loss1))
        # record training loss
        with open(c.LOSS_LOG, "a") as f:
            f.write("{0},{1}\n".format(grad_steps, loss))
        if (grad_steps) % 10 == 0:
            fm1, tpr1, acc1, eer1 = eval_model(model, train_batch_size, test_dir=c.DATASET_DIR, check_partial=True, gru_model=gru_model)
            logging.info('test training data EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f} '.format(eer1, fm1, acc1))
            with open(c.CHECKPOINT_FOLDER + '/train_acc_eer.txt', "a") as f:
                f.write("{0},{1},{2},{3}\n".format(grad_steps, eer1, fm1, acc1))

        if (grad_steps ) % c.TEST_PER_EPOCHS == 0 :
            fm, tpr, acc, eer = eval_model(model,train_batch_size, test_dir=c.TEST_DIR,gru_model=gru_model)
            logging.info('== Testing model after batch #{0}'.format(grad_steps))
            logging.info('EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f} '.format(eer, fm, acc))
            with open(c.TEST_LOG, "a") as f:
                f.write("{0},{1},{2},{3}\n".format(grad_steps, eer, fm, acc))

        # checkpoints are really heavy so let's just keep the last one.
        if (grad_steps ) % c.SAVE_PER_EPOCHS == 0:
            create_dir_and_delete_content(c.CHECKPOINT_FOLDER)
            model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(c.CHECKPOINT_FOLDER, grad_steps, loss))
            if c.COMBINE_MODEL:
                gru_model.save_weights('{0}/grumodel_{1}_{2:.5f}.h5'.format(c.GRU_CHECKPOINT_FOLDER, grad_steps, loss1))
            if eer < lasteer:
                files = sorted(filter(lambda f: os.path.isfile(f) and f.endswith(".h5"),
                                      map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f), os.listdir(c.BEST_CHECKPOINT_FOLDER))),
                               key=lambda file: file.split('/')[-1].split('.')[-2], reverse=True)
                lasteer = eer
                for file in files[:-4]:
                    logging.info("removing old model: {}".format(file))
                    os.remove(file)
                model.save_weights(c.BEST_CHECKPOINT_FOLDER+'/best_model{0}_{1:.5f}.h5'.format(grad_steps, eer))
                if c.COMBINE_MODEL:
                    files = sorted(filter(lambda f: os.path.isfile(f) and f.endswith(".h5"),
                                          map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f),
                                              os.listdir(c.BEST_CHECKPOINT_FOLDER))),
                                   key=lambda file: file.split('/')[-1].split('.')[-2], reverse=True)
                    lasteer = eer
                    for file in files[:-4]:
                        logging.info("removing old model: {}".format(file))
                        os.remove(file)
                    gru_model.save_weights(c.BEST_CHECKPOINT_FOLDER+'/best_gru_model{0}_{1:.5f}.h5'.format(grad_steps, eer))

        grad_steps += 1
        end_time = time()  # ===========================================================================2020/05/20 16:30
        total_times += train_end_time - begin_time  # ===========================================================================2020/05/20 16:30
        train_times.append(str(begin_time) + '_' + str(train_end_time) + '_' + str(end_time) + '_' + str(train_end_time - begin_time))  # ===========================================================================2020/05/20 16:30
        print("步数:{},耗时:{}s".format(grad_steps, str(train_end_time - begin_time)))  # ===========================================================================2020/05/20 16:30
    # ===========================================================================2020/05/20 16:30
    # 将时间写入文件
    with open('DeepSpeaker_epoch10_spk{}_suttperspk{}_迭代耗时.txt'.format(str(c.Spk_num), str(c.UttPerSpk)), mode='w', encoding='utf-8') as wf:
        wf.write("步数{}_平均每次训练耗时:{}\n".format(grad_steps, total_times / grad_steps))
        wf.write("开始训练时间_结束训练时间_结束步数训练时间(包括验证读写文件等)_耗时(结束训练时间-开始训练时间)\n")
        for line in train_times:
            wf.write(line + '\n')
Example #14
0
def main():

    PRE_TRAIN = c.PRE_TRAIN
    print('Looking for fbank features [.npy] files in {}.'.format(
        c.DATASET_DIR))
    libri = data_catalog(c.DATASET_DIR)

    unique_speakers = libri['speaker'].unique()
    speaker_utterance_dict, unique_speakers = create_dict(
        libri['filename'].values, libri['speaker'].values, unique_speakers)
    select_batch.create_data_producer(unique_speakers, speaker_utterance_dict)

    orig_time = time()
    model = convolutional_model(input_shape=c.INPUT_SHAPE,
                                batch_size=c.BATCH_SIZE,
                                num_frames=c.NUM_FRAMES)
    print(model.summary())
    grad_steps = 0
    if PRE_TRAIN:
        last_checkpoint = get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER)
        if last_checkpoint is not None:
            print('Found pre-training checkpoint [{}]. Resume from here...'.
                  format(last_checkpoint))
            x = model.output
            x = Dense(len(unique_speakers),
                      activation='softmax',
                      name='softmax_layer')(x)
            pre_model = Model(model.input, x)
            pre_model.load_weights(last_checkpoint)
            grad_steps = int(last_checkpoint.split('_')[-2])
            grad_steps = 0
            print('Successfully loaded pre-training model')

    else:
        last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER)
        if last_checkpoint is not None:
            print('Found checkpoint [{}]. Resume from here...'.format(
                last_checkpoint))
            model.load_weights(last_checkpoint)
            grad_steps = int(last_checkpoint.split('_')[-2])
            print('[DONE]')

    adam = Adam(lr=0.001,
                beta_1=0.9,
                beta_2=0.999,
                epsilon=None,
                decay=0.0,
                amsgrad=False)
    model.compile(optimizer='adam', loss=deep_speaker_loss)

    print("model_build_time", time() - orig_time)
    print('Starting training...')
    last_loss = 10
    while True:
        orig_time = time()
        #x, _ = select_batch.best_batch(model, batch_size=c.BATCH_SIZE)
        #y = np.random.uniform(size=(x.shape[0], 1))
        x, y = random_batch(libri, c.BATCH_SIZE)
        print('== Presenting step #{0}'.format(grad_steps))
        orig_time = time()
        loss = model.train_on_batch(x, y)
        print('== Processed in {0:.2f}s by the network, training loss = {1}.'.
              format(time() - orig_time, loss))

        # record training loss
        with open(c.LOSS_LOG, "a") as f:
            f.write("{0},{1}\n".format(grad_steps, loss))

        # checkpoints are really heavy so let's just keep the last one.
        if (grad_steps) % c.SAVE_PER_EPOCHS == 0:
            create_dir_and_delete_content(c.CHECKPOINT_FOLDER)
            model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(
                c.CHECKPOINT_FOLDER, grad_steps, loss))
            if loss < last_loss:
                files = sorted(filter(
                    lambda f: os.path.isfile(f) and f.endswith(".h5"),
                    map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f),
                        os.listdir(c.BEST_CHECKPOINT_FOLDER))),
                               key=lambda file: file.split('/')[-1].split('.')[
                                   -2],
                               reverse=True)
                last_loss = loss
                for file in files[:-4]:
                    print("removing old model: {}".format(file))
                    os.remove(file)
                model.save_weights(
                    c.BEST_CHECKPOINT_FOLDER +
                    '/best_model{0}_{1:.5f}.h5'.format(grad_steps, loss))

        grad_steps += 1