i_1 = i_1[:, ::downsampling, :] i_2 = i_2[:, ::downsampling, :] if whitening: i_1, i_2 = whiten(i_1), whiten(i_2) return [i_1, i_2], labels return preprocessor_ whiten_downsample = preprocessor(downsampling, whitening=True) ################### # Create datasets # ################### train = LibriSpeechDataset(training_set, n_seconds) valid = LibriSpeechDataset(validation_set, n_seconds, stochastic=False) train_generator = (whiten_downsample(batch) for batch in train.yield_verification_batches(batchsize)) valid_generator = (whiten_downsample(batch) for batch in valid.yield_verification_batches(batchsize)) ################ # Define model # ################ encoder = get_baseline_convolutional_encoder(model_n_filters, model_embedding_dimension) siamese = build_siamese_net(encoder, (input_length, 1)) opt = Adam(clipnorm=1.) siamese.compile(loss=contrastive_loss, optimizer=opt, metrics=['accuracy'])
num_evaluation_tasks = 500 n_shot_classification = 1 k_way_classification = 5 ################# # Training Loop # ################# for fragment_length in n_seconds: print('*' * 23) print('***** {:.1f} seconds *****'.format(fragment_length)) print('*' * 23) input_length = int(LIBRISPEECH_SAMPLING_RATE * fragment_length / downsampling) # Create datasets train = LibriSpeechDataset(training_set, fragment_length, pad=True) valid = LibriSpeechDataset(validation_set, fragment_length, stochastic=False, pad=True) batch_preprocessor = BatchPreProcessor('siamese', preprocess_instances(downsampling)) train_generator = (batch_preprocessor(batch) for batch in train.yield_verification_batches(batchsize)) valid_generator = (batch_preprocessor(batch) for batch in valid.yield_verification_batches(batchsize)) for repeat in range(n_repeats): # Define model encoder = get_baseline_convolutional_encoder(model_n_filters, model_embedding_dimension, dropout=model_dropout) siamese = build_siamese_net(encoder, (input_length, 1), distance_metric='uniform_euclidean') opt = Adam(clipnorm=1.) siamese.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # Train param_str = 'siamese__nseconds_{}__filters_{}__embed_{}__drop_{}__r_{}'.format(fragment_length, model_n_filters,
val_metrics = ['pooled_eer', 'accuracy', 'micro_f1'] # Derived parameters input_length = int(window_size / downsampling) param_str = 'siamese__filters_{}__embed_{}__drop_{}__pad={}'.format( filters, embedding_dimension, dropout, pad) ################### # Create datasets # ################### # TODO replace with Kaldi data_dir = '/home/vano/wrkdir/datasets/LibriSpeech' # data_dir = '/home/vano/wrkdir/projects_data/sre_2019/toy_dataset' train = LibriSpeechDataset(data_dir, training_set, n_seconds, pad=pad) valid = LibriSpeechDataset(data_dir, validation_set, n_seconds, stochastic=False, pad=pad) batch_preprocessor = BatchPreProcessor('siamese', preprocess_instances(downsampling)) train_generator = (batch_preprocessor(batch) for batch in train.yield_verification_batches(batchsize)) valid_generator = (batch_preprocessor(batch) for batch in valid.yield_verification_batches(batchsize)) ################ # Define model #
downsampling = 4 n_seconds = 3 validation_set = 'dev-clean' siamese_model_path = PATH + '/models/n_seconds/siamese__nseconds_3.0__filters_128__embed_64__drop_0.0__r_0.hdf5' classifier_model_path = PATH + '/models/baseline_classifier.hdf5' k_way = list(range(2, 21, 1)) n_shot = [1, 5] num_tasks = 1000 distance = 'dot_product' results_path = PATH + '/logs/k-way_n-shot_accuracy_{}_{}.csv'.format( validation_set, distance) ################### # Create datasets # ################### valid = LibriSpeechDataset(validation_set, n_seconds, stochastic=False) batch_preprocessor = BatchPreProcessor('siamese', preprocess_instances(downsampling)) ############# # Main Loop # ############# siamese = load_model(siamese_model_path) classifier = load_model(classifier_model_path) with open(results_path, 'w') as f: print('method,n_correct,n_tasks,n_shot,k_way', file=f) results = [] for k in k_way: for n in n_shot:
pad = True num_epochs = 50 evaluate_every_n_batches = 500 num_evaluation_tasks = 500 n_shot_classification = 1 k_way_classification = 5 # Derived parameters input_length = int(LIBRISPEECH_SAMPLING_RATE * n_seconds / downsampling) param_str = 'classifier__filters_{}__embed_{}__drop_{}__pad={}'.format( filters, embedding_dimension, dropout, pad) ################### # Create datasets # ################### train = LibriSpeechDataset(training_set, n_seconds) valid = LibriSpeechDataset(validation_set, n_seconds, stochastic=False) # Map speaker IDs to the range 0 - (train.num_classes() - 1) unique_speakers = sorted(train.df['speaker_id'].unique()) speaker_id_mapping = { unique_speakers[i]: i for i in range(train.num_classes()) } class BatchedSequence(Sequence): """Convenience class""" def __init__(self, sequence, batch_preprocessor, batchsize): self.sequence = sequence self.batch_preprocessor = batch_preprocessor
remaining_seconds = seconds_per_speaker - df_1['seconds'].sum() df_remaining = df_[df_['seconds'] < remaining_seconds].head(1) new_df.append(pd.concat([df_1, df_remaining])) new_df = pd.concat(new_df) new_dataset.df = new_df return new_dataset ################# # Training Loop # ################# train = LibriSpeechDataset(training_set, n_seconds, pad=pad) valid = LibriSpeechDataset(validation_set, n_seconds, stochastic=False, pad=pad) batch_preprocessor = BatchPreProcessor('siamese', preprocess_instances(downsampling)) valid_generator = (batch_preprocessor(batch) for batch in valid.yield_verification_batches(batchsize)) n_speakers = np.ceil(np.logspace(np.log(min_speakers), np.log(max_speakers), n_points, base=np.e)).astype(int) for n in n_speakers: minutes_per_speaker = min_speakers*max_minutes*1./n seconds_per_speaker = minutes_per_speaker*60. print('*' * 35) print('{} speakers, {:.2f} minutes per speaker'.format(n, min_speakers*max_minutes*1./n)) print('*' * 35) reduced_train = create_reduced_dataset(train, n, seconds_per_speaker)
def setUpClass(cls): cls.dataset = LibriSpeechDataset('dev-clean', 3)
num_evaluation_tasks = 500 n_shot_classification = 1 k_way_classification = 5 ################# # Training Loop # ################# for fragment_length in n_seconds: print '*' * 23 print '***** {:.1f} seconds *****'.format(fragment_length) print '*' * 23 input_length = int(LIBRISPEECH_SAMPLING_RATE * fragment_length / downsampling) # Create datasets train = LibriSpeechDataset(training_set, fragment_length, pad=True) valid = LibriSpeechDataset(validation_set, fragment_length, stochastic=False, pad=True) batch_preprocessor = BatchPreProcessor('siamese', preprocess_instances(downsampling)) train_generator = ( batch_preprocessor(batch) for batch in train.yield_verification_batches(batchsize)) valid_generator = ( batch_preprocessor(batch) for batch in valid.yield_verification_batches(batchsize)) for repeat in range(n_repeats):