def train(self): ConsoleLogger.status("Running the experiment called '{}'".format( self._name)) ConsoleLogger.status('Begins to train the model') self._trainer.train() ConsoleLogger.success( "Succeed to runned the experiment called '{}'".format(self._name))
def evaluate(self, evaluation_options): ConsoleLogger.status("Running the experiment called '{}'".format( self._name)) ConsoleLogger.status('Begins to evaluate the model') self._evaluator.evaluate(evaluation_options) ConsoleLogger.success( "Succeed to runned the experiment called '{}'".format(self._name))
def test_global_conditioning(self): configuration = None with open('../../configurations/vctk_features.yaml', 'r') as configuration_file: configuration = yaml.load(configuration_file) device_configuration = DeviceConfiguration.load_from_configuration(configuration) data_stream = VCTKSpeechStream(configuration, device_configuration.gpu_ids, device_configuration.use_cuda) (x_enc, x_dec, speaker_id, _, _) = next(iter(data_stream.training_loader)) ConsoleLogger.status('x_enc.size(): {}'.format(x_enc.size())) ConsoleLogger.status('x_dec.size(): {}'.format(x_dec.size())) x = x_dec.squeeze(-1) global_conditioning = GlobalConditioning.compute( speaker_dic=data_stream.speaker_dic, speaker_ids=speaker_id, x_one_hot=x, expand=False ) self.assertEqual(global_conditioning.size(), torch.Size([1, 128, 1])) ConsoleLogger.success('global_conditioning.size(): {}'.format(global_conditioning.size())) expanded_global_conditioning = GlobalConditioning.compute( speaker_dic=data_stream.speaker_dic, speaker_ids=speaker_id, x_one_hot=x, expand=True ) self.assertEqual(expanded_global_conditioning.size(), torch.Size([1, 128, 7680])) ConsoleLogger.success('expanded_global_conditioning.size(): {}'.format(expanded_global_conditioning.size()))
def _plot_merged_all_losses_type(self, all_results_paths, all_experiments_names, all_train_losses, all_train_perplexities, all_latest_epochs, colormap_name='tab20'): latest_epoch = all_latest_epochs[0] for i in range(1, len(all_latest_epochs)): if all_latest_epochs[i] != latest_epoch: raise ValueError( 'All experiments must have the same number of epochs to merge them' ) results_path = all_results_paths[0] all_train_losses_smooth = dict() for i in range(len(all_train_losses)): for loss_name in all_train_losses[i].keys(): if loss_name == 'loss': continue if loss_name not in all_train_losses_smooth: all_train_losses_smooth[loss_name] = list() all_train_losses_smooth[loss_name].append( self._smooth_curve(all_train_losses[i][loss_name])) for loss_name in all_train_losses_smooth.keys(): n_colors = len(all_train_losses_smooth[loss_name]) colors = self._get_colors_from_cmap(colormap_name, n_colors) train_losses_smooth = all_train_losses_smooth[loss_name] all_train_loss_smooth = np.asarray(train_losses_smooth) all_train_loss_smooth = np.reshape( all_train_loss_smooth, (n_colors, latest_epoch, all_train_loss_smooth.shape[1] // latest_epoch)) fig, ax = plt.subplots(figsize=(8, 8)) for j in range(len(all_train_loss_smooth)): ax = self._plot_fill_between(ax, colors[j], all_train_loss_smooth[j], all_experiments_names[j]) ax = self._configure_ax(ax, title='Smoothed ' + loss_name.replace('_', ' '), xlabel='Epochs', ylabel='Loss', legend=True) output_plot_path = results_path + os.sep + loss_name + '.png' fig.savefig(output_plot_path) plt.close(fig) ConsoleLogger.success( "Saved figure at path '{}'".format(output_plot_path))
def _plot_loss_and_perplexity_figures(self, all_results_paths, all_experiments_names, all_train_losses, all_train_perplexities, all_latest_epochs, n_colors, colors): for i in range(len(all_experiments_names)): results_path = all_results_paths[i] experiment_name = all_experiments_names[i] output_plot_path = results_path + os.sep + experiment_name + '_loss-and-perplexity.png' train_loss_smooth = self._smooth_curve(all_train_losses[i]['loss']) train_perplexity_smooth = self._smooth_curve( all_train_perplexities[i]) latest_epoch = all_latest_epochs[i] train_loss_smooth = np.asarray(train_loss_smooth) train_perplexity_smooth = np.asarray(train_perplexity_smooth) train_loss_smooth = np.reshape( train_loss_smooth, (latest_epoch, train_loss_smooth.shape[0] // latest_epoch)) train_perplexity_smooth = np.reshape( train_perplexity_smooth, (latest_epoch, train_perplexity_smooth.shape[0] // latest_epoch)) fig = plt.figure(figsize=(16, 8)) ax = fig.add_subplot(1, 2, 1) ax = self._plot_fill_between(ax, colors[i], train_loss_smooth, all_experiments_names[i]) ax = self._configure_ax(ax, title='Smoothed loss', xlabel='Epochs', ylabel='Loss', legend=False) ax = fig.add_subplot(1, 2, 2) ax = self._plot_fill_between(ax, colors[i], train_perplexity_smooth, all_experiments_names[i]) ax = self._configure_ax(ax, title='Smoothed average codebook usage', xlabel='Epochs', ylabel='Perplexity', legend=False) fig.savefig(output_plot_path) plt.close(fig) ConsoleLogger.success( "Saved figure at path '{}'".format(output_plot_path))
def compute_groundtruth_average_phonemes_number(self): alignments_dic = None with open( self._results_path + os.sep + 'vctk_groundtruth_alignments.pickle', 'rb') as f: alignments_dic = pickle.load(f) extended_alignment_dataset = alignments_dic[ 'extended_alignment_dataset'] phonemes_number = list() for _, alignment in extended_alignment_dataset: phonemes_number.append(len(np.unique(alignment))) ConsoleLogger.success( 'The average number of phonemes per alignment for {} alignments is: {}' .format(len(extended_alignment_dataset), np.mean(round(phonemes_number, 2))))
def export_to_features(self, vctk_path, configuration): if not os.path.isdir(vctk_path): raise ValueError( "VCTK dataset not found at path '{}'".format(vctk_path)) # Create the features path directory if it doesn't exist features_path = vctk_path + os.sep + configuration['features_path'] if not os.path.isdir(features_path): ConsoleLogger.status( 'Creating features directory at path: {}'.format( features_path)) os.mkdir(features_path) else: ConsoleLogger.status( 'Features directory already created at path: {}'.format( features_path)) # Create the features path directory if it doesn't exist train_features_path = features_path + os.sep + 'train' if not os.path.isdir(train_features_path): ConsoleLogger.status( 'Creating train features directory at path: {}'.format( train_features_path)) os.mkdir(train_features_path) else: ConsoleLogger.status( 'Train features directory already created at path: {}'.format( train_features_path)) # Create the features path directory if it doesn't exist val_features_path = features_path + os.sep + 'val' if not os.path.isdir(val_features_path): ConsoleLogger.status( 'Creating val features directory at path: {}'.format( val_features_path)) os.mkdir(val_features_path) else: ConsoleLogger.status( 'Val features directory already created at path: {}'.format( val_features_path)) def process(loader, output_dir, input_features_name, output_features_name, rate, input_filters_number, output_filters_number, input_target_shape, augment_output_features, export_one_hot_features): initial_index = 0 attempts = 10 current_attempt = 0 total_length = len(loader) while current_attempt < attempts: try: i = initial_index bar = tqdm(loader, initial=initial_index) for data in bar: (preprocessed_audio, one_hot, speaker_id, quantized, wav_filename, sampling_rate, shifting_time, random_starting_index, preprocessed_length, top_db) = data output_path = output_dir + os.sep + str(i) + '.pickle' if os.path.isfile(output_path): if os.path.getsize(output_path) == 0: bar.set_description( '{} already exists but is empty. Computing it again...' .format(output_path)) os.remove(output_path) else: bar.set_description( '{} already exists'.format(output_path)) i += 1 continue input_features = SpeechFeatures.features_from_name( name=input_features_name, signal=preprocessed_audio, rate=rate, filters_number=input_filters_number) if input_features.shape[0] != input_target_shape[ 0] or input_features.shape[ 1] != input_target_shape[1]: ConsoleLogger.warn( "Raw features number {} with invalid dimension {} will not be saved. Target shape: {}" .format(i, input_features.shape, input_target_shape)) i += 1 continue output_features = SpeechFeatures.features_from_name( name=output_features_name, signal=preprocessed_audio, rate=rate, filters_number=output_filters_number, augmented=augment_output_features) # TODO: add an option in configuration to save quantized/one_hot or not output = { 'preprocessed_audio': preprocessed_audio, 'wav_filename': wav_filename, 'input_features': input_features, 'one_hot': one_hot if export_one_hot_features else np.array([]), 'quantized': np.array([]), 'speaker_id': speaker_id, 'output_features': output_features, 'shifting_time': shifting_time, 'random_starting_index': random_starting_index, 'preprocessed_length': preprocessed_length, 'sampling_rate': sampling_rate, 'top_db': top_db } with open(output_path, 'wb') as file: pickle.dump(output, file) bar.set_description('{} saved'.format(output_path)) i += 1 if i == total_length: bar.update(total_length) break bar.close() break except KeyboardInterrupt: bar.close() ConsoleLogger.warn( 'Keyboard interrupt detected. Leaving the function...') return except: error_message = 'An error occured in the data loader at {}/{}. Current attempt: {}/{}'.format( output_dir, i, current_attempt + 1, attempts) self._logger.exception(error_message) ConsoleLogger.error(error_message) initial_index = i current_attempt += 1 continue try: ConsoleLogger.status('Processing training part') process( loader=self._training_loader, output_dir=train_features_path, input_features_name=configuration['input_features_type'], output_features_name=configuration['output_features_type'], rate=configuration['sampling_rate'], input_filters_number=configuration['input_features_filters'], output_filters_number=configuration['output_features_filters'], input_target_shape=(configuration['input_features_dim'], configuration['input_features_filters'] * 3), augment_output_features=configuration[ 'augment_output_features'], export_one_hot_features=configuration[ 'export_one_hot_features']) ConsoleLogger.success('Training part processed') except: ConsoleLogger.error( 'An error occured during training features generation') try: ConsoleLogger.status('Processing validation part') process( loader=self._validation_loader, output_dir=val_features_path, input_features_name=configuration['input_features_type'], output_features_name=configuration['output_features_type'], rate=configuration['sampling_rate'], input_filters_number=configuration['input_features_filters'], output_filters_number=configuration['output_features_filters'], input_target_shape=(configuration['input_features_dim'], configuration['input_features_filters'] * 3), augment_output_features=configuration[ 'augment_output_features'], export_one_hot_features=configuration[ 'export_one_hot_features']) ConsoleLogger.success('Validation part processed') except: ConsoleLogger.error( 'An error occured during validation features generation')
sys.exit(0) if args.fetch: Experiments.load(args.experiments_configuration_path).fetch() sys.exit(0) if args.export_to_features: configuration = load_configuration(default_configuration_path) configuration = update_configuration_from_experiments( args.experiments_configuration_path, configuration) device_configuration = DeviceConfiguration.load_from_configuration( configuration) data_stream = VCTKSpeechStream(configuration, device_configuration.gpu_ids, device_configuration.use_cuda) data_stream.export_to_features(default_dataset_path, configuration) ConsoleLogger.success( "VCTK exported to a new features dataset at: '{}'".format( default_dataset_path + os.sep + configuration['features_path'])) sys.exit(0) if args.evaluate: Experiments.load( args.experiments_configuration_path).evaluate(evaluation_options) ConsoleLogger.success('All evaluating experiments done') sys.exit(0) if args.compute_dataset_stats: configuration = load_configuration(default_configuration_path) configuration = update_configuration_from_experiments( args.experiments_configuration_path, configuration) device_configuration = DeviceConfiguration.load_from_configuration( configuration)
fig, ax = plt.subplots() ax.plot(np.arange(N), sil_duration_gaps) ax.set_title( 'Silence duration gap between montreal alignments and\nlibrosa loading with sil thresh at 20db' ) ax.axhline(y=mean_sil_duration_gaps, xmin=0.0, xmax=1.0, color='r') yt = ax.get_yticks() yt = np.append(yt, mean_sil_duration_gaps) ax.set_yticks(yt) ax.set_ylabel('Time (s)') ax.set_xlabel('Number of audio samples') ax.set_ylim(bottom=0) fig.savefig('../results/sil_duration_gaps.png') plt.close(fig) ConsoleLogger.success( 'mean sil duration gap: {}'.format(mean_sil_duration_gaps)) with open('../results/sil_duration_gap_stats.pickle', 'wb') as file: pickle.dump( { 'sil_duration_gaps': sil_duration_gaps, 'audio_filenames': audio_filenames, 'original_shifting_times': original_shifting_times, 'beginning_trimmed_times': beginning_trimmed_times, 'detected_sil_durations': detected_sil_durations, 'mean_sil_duration_gaps': mean_sil_duration_gaps }, file)
def compute_clustering_metrics(self): groundtruth_alignments_dic = None with open( self._results_path + os.sep + 'vctk_groundtruth_alignments.pickle', 'rb') as f: groundtruth_alignments_dic = pickle.load(f) empirical_alignments_dic = None with open( self._results_path + os.sep + self._experiment_name + '_vctk_empirical_alignments.pickle', 'rb') as f: empirical_alignments_dic = pickle.load(f) groundtruth_alignments = np.array( groundtruth_alignments_dic['extended_alignment_dataset']) possible_phonemes = list( groundtruth_alignments_dic['possible_phonemes']) empirical_alignments = np.array( empirical_alignments_dic['all_alignments']) phonemes_indices = { possible_phonemes[i]: i for i in range(len(possible_phonemes)) } ConsoleLogger.status('#{} possible phonemes: {}'.format( len(possible_phonemes), possible_phonemes)) ConsoleLogger.status('# of raw groundtruth alignments: {}'.format( len(groundtruth_alignments))) ConsoleLogger.status('# of raw empirical alignments: {}'.format( len(empirical_alignments))) groundtruth_utterance_keys = set() final_groundtruth_alignments = list() final_empirical_alignments = list() alignment_length = ((self._configuration['length'] / self._configuration['sampling_rate']) * 100) / 2 for (utterence_key, alignment) in groundtruth_alignments: if len(alignment) != alignment_length: # FIXME ConsoleLogger.error( 'len(alignment) != alignment_length: {}'.format( len(alignment))) continue groundtruth_utterance_keys.add(utterence_key) final_groundtruth_alignments.append([ phonemes_indices[alignment[i]] for i in range(len(alignment)) ]) for (utterence_key, alignment) in empirical_alignments: if utterence_key in groundtruth_utterance_keys: final_empirical_alignments.append(alignment) final_groundtruth_alignments = np.asarray(final_groundtruth_alignments) final_empirical_alignments = np.asarray(final_empirical_alignments) ConsoleLogger.status('Groundtruth alignments shape: {}'.format( final_groundtruth_alignments.shape)) ConsoleLogger.status('Empirical alignments shape: {}'.format( final_empirical_alignments.shape)) ConsoleLogger.status('Groundtruth alignments samples: {}'.format( [final_groundtruth_alignments[i] for i in range(2)])) ConsoleLogger.status('Empirical alignments samples: {}'.format( [final_empirical_alignments[i] for i in range(2)])) concatenated_groundtruth_alignments = np.concatenate( final_groundtruth_alignments) concatenated_empirical_alignments = np.concatenate( final_empirical_alignments) ConsoleLogger.status( 'Concatenated groundtruth alignments shape: {}'.format( concatenated_groundtruth_alignments.shape)) ConsoleLogger.status( 'Concatenated empirical alignments shape: {}'.format( concatenated_empirical_alignments.shape)) adjusted_rand_score = sklearn.metrics.adjusted_rand_score( concatenated_groundtruth_alignments, concatenated_empirical_alignments) adjusted_mutual_info_score = sklearn.metrics.adjusted_mutual_info_score( concatenated_groundtruth_alignments, concatenated_empirical_alignments) normalized_mutual_info_score = sklearn.metrics.normalized_mutual_info_score( concatenated_groundtruth_alignments, concatenated_empirical_alignments) ConsoleLogger.success( 'Adjusted rand score: {}'.format(adjusted_rand_score)) ConsoleLogger.success('Adjusted mututal info score: {}'.format( adjusted_mutual_info_score)) ConsoleLogger.success( 'Normalized adjusted mututal info score: {}'.format( normalized_mutual_info_score)) with open( self._results_path + os.sep + self._experiment_name + '_adjusted_rand_score.npy', 'wb') as f: np.save(f, adjusted_rand_score) with open( self._results_path + os.sep + self._experiment_name + '_adjusted_mutual_info_score.npy', 'wb') as f: np.save(f, adjusted_mutual_info_score) with open( self._results_path + os.sep + self._experiment_name + '_normalized_mutual_info_score.npy', 'wb') as f: np.save(f, normalized_mutual_info_score) ConsoleLogger.success( 'All scores from cluestering metrics were successfully saved')
def fetch(self): ConsoleLogger.status("Running the experiment called '{}'".format(self._name)) ConsoleLogger.status('Begins to eval the model and save data') self._trainer.fetch() ConsoleLogger.success("Succeed to runned the experiment called '{}'".format(self._name))
def _plot_merged_all_losses_figures(self, all_results_paths, all_experiments_names, all_train_losses, all_train_perplexities, all_latest_epochs, colormap_name='tab20'): latest_epoch = all_latest_epochs[0] for i in range(1, len(all_latest_epochs)): if all_latest_epochs[i] != latest_epoch: raise ValueError( 'All experiments must have the same number of epochs to merge them' ) results_path = all_results_paths[0] all_train_losses_smooth = list() for i in range(len(all_train_losses)): train_losses_smooth = list() train_losses_names = list() for key in all_train_losses[i].keys(): train_loss_smooth = self._smooth_curve( all_train_losses[i][key]) train_losses_smooth.append(train_loss_smooth) train_losses_names.append(key) all_train_losses_smooth.append( (train_losses_smooth, train_losses_names)) for i in range(len(all_train_losses_smooth)): n_colors = len(all_train_losses[i]) colors = self._get_colors_from_cmap(colormap_name, n_colors) (train_losses_smooth, train_losses_names) = all_train_losses_smooth[i] all_train_loss_smooth = np.asarray(train_losses_smooth) all_train_loss_smooth = np.reshape( all_train_loss_smooth, (n_colors, latest_epoch, all_train_loss_smooth.shape[1] // latest_epoch)) fig, ax = plt.subplots(figsize=(8, 8)) for j in range(len(all_train_loss_smooth)): ax = self._plot_fill_between(ax, colors[j], all_train_loss_smooth[j], train_losses_names[j]) experiment_name = all_experiments_names[i] ax = self._configure_ax(ax, title='Smoothed losses of ' + experiment_name, xlabel='Epochs', ylabel='Loss', legend=True) output_plot_path = results_path + os.sep + experiment_name + '_merged-losses.png' fig.savefig(output_plot_path) plt.close(fig) ConsoleLogger.success( "Saved figure at path '{}'".format(output_plot_path))
def _plot_merged_losses_and_perplexities_figure( self, all_results_paths, all_experiments_names, all_train_losses, all_train_perplexities, all_latest_epochs, n_colors, colors): latest_epoch = all_latest_epochs[0] for i in range(1, len(all_latest_epochs)): if all_latest_epochs[i] != latest_epoch: raise ValueError( 'All experiments must have the same number of epochs to merge them' ) results_path = all_results_paths[0] experiment_name = 'merged-loss-and-perplexity' output_plot_path = results_path + os.sep + experiment_name + '.png' all_train_loss_smooth = list() all_train_perplexity_smooth = list() for i in range(len(all_train_perplexities)): train_loss_smooth = self._smooth_curve(all_train_losses[i]['loss']) train_perplexity_smooth = self._smooth_curve( all_train_perplexities[i]) all_train_loss_smooth.append(train_loss_smooth) all_train_perplexity_smooth.append(train_perplexity_smooth) all_train_loss_smooth = np.asarray(all_train_loss_smooth) all_train_perplexity_smooth = np.asarray(all_train_perplexity_smooth) all_train_loss_smooth = np.reshape( all_train_loss_smooth, (n_colors, latest_epoch, all_train_loss_smooth.shape[1] // latest_epoch)) all_train_perplexity_smooth = np.reshape( all_train_perplexity_smooth, (n_colors, latest_epoch, all_train_perplexity_smooth.shape[1] // latest_epoch)) fig = plt.figure(figsize=(16, 8)) ax = fig.add_subplot(1, 2, 1) for i in range(len(all_train_loss_smooth)): ax = self._plot_fill_between(ax, colors[i], all_train_loss_smooth[i], all_experiments_names[i]) ax = self._configure_ax(ax, title='Smoothed loss', xlabel='Epochs', ylabel='Loss', legend=True) ax = fig.add_subplot(1, 2, 2) for i in range(len(all_train_perplexity_smooth)): ax = self._plot_fill_between(ax, colors[i], all_train_perplexity_smooth[i], all_experiments_names[i]) ax = self._configure_ax(ax, title='Smoothed average codebook usage', xlabel='Epochs', ylabel='Perplexity', legend=True) fig.savefig(output_plot_path) plt.close(fig) ConsoleLogger.success( "Saved figure at path '{}'".format(output_plot_path))