def main(): # gpu configuration toolkits.initialize_GPU(args) import model # ================================== # Get Train/Val. # ================================== print('==> calculating test({}) data lists...'.format(args.test_type)) trnlist, trnlb = toolkits.get_voxceleb2_datalist( args, path='../meta/voxlb2_train.txt') vallist, vallb = toolkits.get_voxceleb2_datalist( args, path='../meta/voxlb2_val.txt') if args.test_type == 'normal': verify_list = np.loadtxt('../meta/voxceleb1_veri_test.txt', str) elif args.test_type == 'hard': verify_list = np.loadtxt('../meta/voxceleb1_veri_test_hard.txt', str) elif args.test_type == 'extend': verify_list = np.loadtxt('../meta/voxceleb1_veri_test_extended.txt', str) else: raise IOError('==> unknown test type.') verify_lb = np.array([int(i[0]) for i in verify_list]) list1 = np.array([os.path.join(args.data_path, i[1]) for i in verify_list]) list2 = np.array([os.path.join(args.data_path, i[2]) for i in verify_list]) total_list = np.concatenate((list1, list2)) unique_list = np.unique(total_list) # ================================== # Get Model # ================================== # construct the data generator. params = { # 'dim': (257, None, 1), 'dim': (args.frame_num, args.hdim, args.wdim, 3), 'nfft': 512, 'spec_len': 250, 'win_length': 400, 'hop_length': 160, 'n_classes': 5994, 'sampling_rate': 16000, 'normalize': True, } Dim = (args.frame_num, args.hdim, args.wdim, 3) network_eval = model.vggvox_resnet2d_icassp(input_dim=Dim, num_class=params['n_classes'], mode='eval', args=args) # ==> load pre-trained model ??? if args.resume: # ==> get real_model from arguments input, # load the model if the imag_model == real_model. if os.path.isfile(args.resume): network_eval.load_weights(os.path.join(args.resume), by_name=True) result_path = set_result_path(args) print('==> successfully loading model {}.'.format(args.resume)) else: raise IOError("==> no checkpoint found at '{}'".format( args.resume)) else: raise IOError('==> please type in the model to load') print('==> start testing.') # The feature extraction process has to be done sample-by-sample, # because each sample is of different lengths. total_length = len(unique_list) feats, scores, labels = [], [], [] for c, ID in enumerate(unique_list): if c % 50 == 0: print('Finish extracting features for {}/{}th wav.'.format( c, total_length)) specs = ut.load_data(ID, dim_w=args.wdim, dim_h=args.hdim, win_length=params['win_length'], sr=params['sampling_rate'], hop_length=params['hop_length'], n_fft=params['nfft'], spec_len=args.frame_num, mode='eval') # specs = np.expand_dims(np.expand_dims(specs, 0), -1) specs = np.expand_dims(specs, 0) v = network_eval.predict(specs) feats += [v] feats = np.array(feats) # ==> compute the pair-wise similarity. for c, (p1, p2) in enumerate(zip(list1, list2)): ind1 = np.where(unique_list == p1)[0][0] ind2 = np.where(unique_list == p2)[0][0] v1 = feats[ind1, 0] v2 = feats[ind2, 0] scores += [np.sum(v1 * v2)] labels += [verify_lb[c]] print('scores : {}, gt : {}'.format(scores[-1], verify_lb[c])) scores = np.array(scores) labels = np.array(labels) np.save(os.path.join(result_path, 'prediction_scores.npy'), scores) np.save(os.path.join(result_path, 'groundtruth_labels.npy'), labels) eer, thresh = toolkits.calculate_eer(labels, scores) print('==> model : {}, EER: {}'.format(args.resume, eer))
def main(): # gpu configuration toolkits.initialize_GPU(args) import model import generator import keras # ================================== # Get Train/Val. # ================================== trnlist, trnlb = toolkits.get_voxceleb2_datalist( args, path='../meta/vox2_train_wav.txt') vallist, vallb = toolkits.get_voxceleb2_datalist( args, path='../meta/vox2_val_wav.txt') # construct the data generator. params = { 'dim': (257, 250, 1), 'mp_pooler': toolkits.set_mp(processes=args.multiprocess), 'nfft': 512, 'spec_len': 250, 'win_length': 400, 'hop_length': 160, 'n_classes': 5994, 'sampling_rate': 16000, 'batch_size': args.batch_size, 'shuffle': True, 'normalize': True, } # Datasets partition = {'train': trnlist.flatten(), 'val': vallist.flatten()} labels = {'train': trnlb.flatten(), 'val': vallb.flatten()} # Generators trn_gen = generator.DataGenerator(partition['train'], labels['train'], **params) network = model.vggvox_resnet2d_icassp(input_dim=params['dim'], num_class=params['n_classes'], mode='train', args=args) # ==> load pre-trained model ??? mgpu = len(keras.backend.tensorflow_backend._get_available_gpus()) if args.resume: if os.path.isfile(args.resume): if mgpu == 1: network.load_weights(os.path.join(args.resume)) else: network.layers[mgpu + 1].load_weights(os.path.join( args.resume)) print('==> successfully loading model {}.'.format(args.resume)) else: print("==> no checkpoint found at '{}'".format(args.resume)) print(network.summary()) print('==> gpu {} is, training {} images, classes: 0-{} ' 'loss: {}, aggregation: {}, ohemlevel: {}'.format( args.gpu, len(partition['train']), np.max(labels['train']), args.loss, args.aggregation_mode, args.ohem_level)) model_path, log_path = set_path(args) normal_lr = keras.callbacks.LearningRateScheduler(step_decay) tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False, update_freq=args.batch_size * 16) callbacks = [ keras.callbacks.ModelCheckpoint(os.path.join( model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'), monitor='loss', mode='min', save_best_only=True), normal_lr, tbcallbacks ] if args.ohem_level > 1: # online hard negative mining will be used candidate_steps = int(len(partition['train']) // args.batch_size) iters_per_epoch = int( len(partition['train']) // (args.ohem_level * args.batch_size)) ohem_generator = generator.OHEM_generator( network, trn_gen, candidate_steps, args.ohem_level, args.batch_size, params['dim'], params['n_classes']) A = ohem_generator.next( ) # for some reason, I need to warm up the generator network.fit_generator(generator.OHEM_generator( network, trn_gen, iters_per_epoch, args.ohem_level, args.batch_size, params['dim'], params['n_classes']), steps_per_epoch=iters_per_epoch, epochs=args.epochs, max_queue_size=10, callbacks=callbacks, use_multiprocessing=False, workers=1, verbose=1) else: network.fit_generator(trn_gen, steps_per_epoch=int( len(partition['train']) // args.batch_size), epochs=args.epochs, max_queue_size=10, callbacks=callbacks, use_multiprocessing=False, workers=1, verbose=1)
def main(): # gpu configuration toolkits.initialize_GPU(args) import model import generator # ================================== # Get Train/Val. # ================================== trnlist, trnlb = toolkits.get_voxceleb2_datalist( args, path='../meta/voxlb2_train.txt') vallist, vallb = toolkits.get_voxceleb2_datalist( args, path='../meta/voxlb2_val.txt') # construct the data generator. params = { 'dim': (257, 250, 1), 'mp_pooler': toolkits.set_mp(processes=args.multiprocess), 'nfft': 512, 'spec_len': 250, 'win_length': 400, 'hop_length': 160, 'n_classes': 5994, 'sampling_rate': 16000, 'batch_size': args.batch_size, 'shuffle': True, 'normalize': True, } # Datasets #The Flatten layer is a utility layer that flattens an input of shape n * c * h * w to a simple vector output of shape n * (c*h*w) partition = {'train': trnlist.flatten(), 'val': vallist.flatten()} # print("partition is: ",partition) labels = {'train': trnlb.flatten(), 'val': vallb.flatten()} # Generators #make data trn_gen = generator.DataGenerator(partition['train'], labels['train'], **params) # create model depend on args network = model.vggvox_resnet2d_icassp(input_dim=params['dim'], num_class=params['n_classes'], mode='train', args=args) # ==> load pre-trained model ??? mgpu = len(keras.backend.tensorflow_backend._get_available_gpus()) if args.resume: if os.path.isfile(args.resume): if mgpu == 1: network.load_weights(os.path.join(args.resume)) else: network.layers[mgpu + 1].load_weights(os.path.join( args.resume)) print('==> successfully loading model {}.'.format(args.resume)) else: print("==> no checkpoint found at '{}'".format(args.resume)) print(network.summary()) print('==> gpu {} is, training {} images, classes: 0-{} ' 'loss: {}, aggregation: {}, ohemlevel: {}'.format( args.gpu, len(partition['train']), np.max(labels['train']), args.loss, args.aggregation_mode, args.ohem_level)) model_path, log_path = set_path(args) normal_lr = keras.callbacks.LearningRateScheduler(step_decay) # tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False, # update_freq=args.batch_size * 16) ''' Keras callbacks return information from a training algorithm while training is taking place. ... A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during trainin ''' # callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'), # monitor='loss', # mode='min', # save_best_only=True), # normal_lr, tbcallbacks] if args.ohem_level > 1: # online hard negative mining will be used candidate_steps = int(len(partition['train']) // args.batch_size) iters_per_epoch = int( len(partition['train']) // (args.ohem_level * args.batch_size)) ohem_generator = generator.OHEM_generator( network, trn_gen, candidate_steps, args.ohem_level, args.batch_size, params['dim'], params['n_classes']) A = ohem_generator.next( ) # for some reason, I need to warm up the generator network.fit_generator(generator.OHEM_generator( network, trn_gen, iters_per_epoch, args.ohem_level, args.batch_size, params['dim'], params['n_classes']), steps_per_epoch=iters_per_epoch, epochs=args.epochs, max_queue_size=10, callbacks=callbacks, use_multiprocessing=False, workers=1, verbose=1) else: print("steps_per_epoch=", int(len(partition['train'])), " ", args.batch_size) print(trn_gen) print(network) print("epochs=", args.epochs, "--------") print("***************", args.batch_size) network.fit_generator( trn_gen, steps_per_epoch=2, #int(len(partition['train'])//args.batch_size), epochs=args.epochs, max_queue_size=2, # callbacks=callbacks, use_multiprocessing=True, workers=1, verbose=1) #should change to one? print("end!")