Exemple #1
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    # ==================================
    #       Get Train/Val.
    # ==================================
    print('==> calculating test({}) data lists...'.format(args.test_type))

    trnlist, trnlb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_train.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_val.txt')

    if args.test_type == 'normal':
        verify_list = np.loadtxt('../meta/voxceleb1_veri_test.txt', str)
    elif args.test_type == 'hard':
        verify_list = np.loadtxt('../meta/voxceleb1_veri_test_hard.txt', str)
    elif args.test_type == 'extend':
        verify_list = np.loadtxt('../meta/voxceleb1_veri_test_extended.txt',
                                 str)
    else:
        raise IOError('==> unknown test type.')

    verify_lb = np.array([int(i[0]) for i in verify_list])
    list1 = np.array([os.path.join(args.data_path, i[1]) for i in verify_list])
    list2 = np.array([os.path.join(args.data_path, i[2]) for i in verify_list])

    total_list = np.concatenate((list1, list2))
    unique_list = np.unique(total_list)

    # ==================================
    #       Get Model
    # ==================================
    # construct the data generator.
    params = {
        #   'dim': (257, None, 1),
        'dim': (args.frame_num, args.hdim, args.wdim, 3),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 5994,
        'sampling_rate': 16000,
        'normalize': True,
    }
    Dim = (args.frame_num, args.hdim, args.wdim, 3)

    network_eval = model.vggvox_resnet2d_icassp(input_dim=Dim,
                                                num_class=params['n_classes'],
                                                mode='eval',
                                                args=args)

    # ==> load pre-trained model ???
    if args.resume:
        # ==> get real_model from arguments input,
        # load the model if the imag_model == real_model.
        if os.path.isfile(args.resume):
            network_eval.load_weights(os.path.join(args.resume), by_name=True)
            result_path = set_result_path(args)
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            raise IOError("==> no checkpoint found at '{}'".format(
                args.resume))
    else:
        raise IOError('==> please type in the model to load')

    print('==> start testing.')

    # The feature extraction process has to be done sample-by-sample,
    # because each sample is of different lengths.
    total_length = len(unique_list)
    feats, scores, labels = [], [], []
    for c, ID in enumerate(unique_list):
        if c % 50 == 0:
            print('Finish extracting features for {}/{}th wav.'.format(
                c, total_length))
        specs = ut.load_data(ID,
                             dim_w=args.wdim,
                             dim_h=args.hdim,
                             win_length=params['win_length'],
                             sr=params['sampling_rate'],
                             hop_length=params['hop_length'],
                             n_fft=params['nfft'],
                             spec_len=args.frame_num,
                             mode='eval')
        # specs = np.expand_dims(np.expand_dims(specs, 0), -1)
        specs = np.expand_dims(specs, 0)
        v = network_eval.predict(specs)
        feats += [v]

    feats = np.array(feats)

    # ==> compute the pair-wise similarity.
    for c, (p1, p2) in enumerate(zip(list1, list2)):
        ind1 = np.where(unique_list == p1)[0][0]
        ind2 = np.where(unique_list == p2)[0][0]

        v1 = feats[ind1, 0]
        v2 = feats[ind2, 0]

        scores += [np.sum(v1 * v2)]
        labels += [verify_lb[c]]
        print('scores : {}, gt : {}'.format(scores[-1], verify_lb[c]))

    scores = np.array(scores)
    labels = np.array(labels)

    np.save(os.path.join(result_path, 'prediction_scores.npy'), scores)
    np.save(os.path.join(result_path, 'groundtruth_labels.npy'), labels)

    eer, thresh = toolkits.calculate_eer(labels, scores)
    print('==> model : {}, EER: {}'.format(args.resume, eer))
Exemple #2
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator
    import keras

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/vox2_train_wav.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/vox2_val_wav.txt')

    # construct the data generator.
    params = {
        'dim': (257, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 5994,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
    }

    # Datasets
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else:
                network.layers[mgpu + 1].load_weights(os.path.join(
                    args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path,
                                              histogram_freq=0,
                                              write_graph=True,
                                              write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [
        keras.callbacks.ModelCheckpoint(os.path.join(
            model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
                                        monitor='loss',
                                        mode='min',
                                        save_best_only=True), normal_lr,
        tbcallbacks
    ]

    if args.ohem_level > 1:  # online hard negative mining will be used
        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(
            len(partition['train']) // (args.ohem_level * args.batch_size))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        network.fit_generator(trn_gen,
                              steps_per_epoch=int(
                                  len(partition['train']) // args.batch_size),
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_train.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_val.txt')

    # construct the data generator.
    params = {
        'dim': (257, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 5994,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
    }

    # Datasets
    #The Flatten layer is a utility layer that flattens an input of shape n * c * h * w to a simple vector output of shape n * (c*h*w)
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    # print("partition is: ",partition)
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    #make data
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    # create model depend on args
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else:
                network.layers[mgpu + 1].load_weights(os.path.join(
                    args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    # tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
    #   update_freq=args.batch_size * 16)
    '''
    Keras callbacks return information from a training algorithm while training is taking place. ... 
    A callback is a set of functions to be applied at given stages of the training procedure. 
    You can use callbacks to get a view on internal states and statistics of the model during trainin
    '''
    # callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
    #                                              monitor='loss',
    #                                              mode='min',
    #                                              save_best_only=True),
    #              normal_lr, tbcallbacks]

    if args.ohem_level > 1:  # online hard negative mining will be used

        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(
            len(partition['train']) // (args.ohem_level * args.batch_size))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        print("steps_per_epoch=", int(len(partition['train'])), "   ",
              args.batch_size)
        print(trn_gen)
        print(network)
        print("epochs=", args.epochs, "--------")
        print("***************", args.batch_size)
        network.fit_generator(
            trn_gen,
            steps_per_epoch=2,  #int(len(partition['train'])//args.batch_size),
            epochs=args.epochs,
            max_queue_size=2,
            #   callbacks=callbacks,
            use_multiprocessing=True,
            workers=1,
            verbose=1)  #should change to one?
        print("end!")