def __init__(self, name, experiments_path, results_path, global_configuration, experiment_configuration, seed): self._name = name self._experiments_path = experiments_path self._results_path = results_path self._global_configuration = global_configuration self._experiment_configuration = experiment_configuration self._seed = seed # Create the experiments path directory if it doesn't exist if not os.path.isdir(experiments_path): ConsoleLogger.status('Creating experiments directory at path: {}'.format(experiments_path)) os.mkdir(experiments_path) else: ConsoleLogger.status('Experiments directory already created at path: {}'.format(experiments_path)) # Create the results path directory if it doesn't exist if not os.path.isdir(results_path): ConsoleLogger.status('Creating results directory at path: {}'.format(results_path)) os.mkdir(results_path) else: ConsoleLogger.status('Results directory already created at path: {}'.format(results_path)) experiments_configuration_path = experiments_path + os.sep + name + '_configuration.yaml' configuration_file_already_exists = True if os.path.isfile(experiments_configuration_path) else False if not configuration_file_already_exists: self._device_configuration = DeviceConfiguration.load_from_configuration(global_configuration) # Create a new configuration state from the default and the experiment specific aspects self._configuration = copy.deepcopy(self._global_configuration) for experiment_key in experiment_configuration.keys(): if experiment_key in self._configuration: self._configuration[experiment_key] = experiment_configuration[experiment_key] # Save the configuration of the experiments with open(experiments_configuration_path, 'w') as file: yaml.dump(self._configuration, file) else: with open(experiments_configuration_path, 'r') as file: self._configuration = yaml.load(file, Loader=yaml.FullLoader) self._device_configuration = DeviceConfiguration.load_from_configuration(self._configuration) if configuration_file_already_exists: self._trainer, self._evaluator, self._configuration, self._device_configuration = PipelineFactory.load(self._experiments_path, self._name, self._results_path) else: self._trainer, self._evaluator = PipelineFactory.build(self._configuration, self._device_configuration, self._experiments_path, self._name, self._results_path)
def retreive_losses_values(experiment_path, experiment): experiment_name = experiment.name ConsoleLogger.status("Searching configuration and checkpoints of experiment '{}' at path '{}'".format(experiment_name, experiment_path)) configuration_file, checkpoint_files = CheckpointUtils.search_configuration_and_checkpoints_files( experiment_path, experiment_name ) # Check if a configuration file was found if not configuration_file: raise ValueError('No configuration file found with name: {}'.format(experiment_name)) # Check if at least one checkpoint file was found if len(checkpoint_files) == 0: raise ValueError('No checkpoint files found with name: {}'.format(experiment_name)) # Load the configuration file configuration_path = experiment_path + os.sep + configuration_file ConsoleLogger.status("Loading the configuration file '{}'".format(configuration_path)) configuration = None with open(configuration_path, 'r') as file: configuration = yaml.load(file, Loader=yaml.FullLoader) # Load the device configuration from the configuration state device_configuration = DeviceConfiguration.load_from_configuration(configuration) ConsoleLogger.status("Merge {} checkpoint losses of experiment '{}'".format(len(checkpoint_files), experiment_name)) train_res_losses, train_res_perplexities = CheckpointUtils.merge_experiment_losses( experiment_path, checkpoint_files, device_configuration ) return train_res_losses, train_res_perplexities, len(checkpoint_files)
def test_global_conditioning(self): configuration = None with open('../../configurations/vctk_features.yaml', 'r') as configuration_file: configuration = yaml.load(configuration_file) device_configuration = DeviceConfiguration.load_from_configuration(configuration) data_stream = VCTKSpeechStream(configuration, device_configuration.gpu_ids, device_configuration.use_cuda) (x_enc, x_dec, speaker_id, _, _) = next(iter(data_stream.training_loader)) ConsoleLogger.status('x_enc.size(): {}'.format(x_enc.size())) ConsoleLogger.status('x_dec.size(): {}'.format(x_dec.size())) x = x_dec.squeeze(-1) global_conditioning = GlobalConditioning.compute( speaker_dic=data_stream.speaker_dic, speaker_ids=speaker_id, x_one_hot=x, expand=False ) self.assertEqual(global_conditioning.size(), torch.Size([1, 128, 1])) ConsoleLogger.success('global_conditioning.size(): {}'.format(global_conditioning.size())) expanded_global_conditioning = GlobalConditioning.compute( speaker_dic=data_stream.speaker_dic, speaker_ids=speaker_id, x_one_hot=x, expand=True ) self.assertEqual(expanded_global_conditioning.size(), torch.Size([1, 128, 7680])) ConsoleLogger.success('expanded_global_conditioning.size(): {}'.format(expanded_global_conditioning.size()))
def load(experiments_path, experiment_name, results_path, data_path='../data'): error_caught = False try: configuration_file, checkpoint_files = PipelineFactory.load_configuration_and_checkpoints( experiments_path, experiment_name) except: ConsoleLogger.error( 'Failed to load existing configuration. Building a new model...' ) error_caught = True # Load the configuration file ConsoleLogger.status('Loading the configuration file') configuration = None with open(experiments_path + os.sep + configuration_file, 'r') as file: configuration = yaml.load(file, Loader=yaml.FullLoader) device_configuration = DeviceConfiguration.load_from_configuration( configuration) if error_caught or len(checkpoint_files) == 0: trainer, evaluator = PipelineFactory.build(configuration, device_configuration, experiments_path, experiment_name, results_path) else: latest_checkpoint_file, latest_epoch = CheckpointUtils.search_latest_checkpoint_file( checkpoint_files) # Update the epoch number to begin with for the future training configuration['start_epoch'] = latest_epoch configuration['num_epochs'] = 60 #latest_checkpoint_file = 'baseline_15_checkpoint.pth' #print(latest_checkpoint_file) # Load the checkpoint file checkpoint_path = experiments_path + os.sep + latest_checkpoint_file ConsoleLogger.status( "Loading the checkpoint file '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path, map_location=device_configuration.device) # Load the data stream ConsoleLogger.status('Loading the data stream') data_stream = VCTKFeaturesStream('/atlas/u/xuyilun/vctk', configuration, device_configuration.gpu_ids, device_configuration.use_cuda) def load_state_dicts(model, checkpoint, model_name, optimizer_name): # Load the state dict from the checkpoint to the model model.load_state_dict(checkpoint[model_name]) # Create an Adam optimizer using the model parameters optimizer = optim.Adam(model.parameters()) # Load the state dict from the checkpoint to the optimizer optimizer.load_state_dict(checkpoint[optimizer_name]) # Map the optimizer memory into the specified device for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.to(device_configuration.device) return model, optimizer # If the decoder type is a deconvolutional if configuration['decoder_type'] == 'deconvolutional': # Create the model and map it to the specified device vqvae_model = ConvolutionalVQVAE( configuration, device_configuration.device).to( device_configuration.device) evaluator = Evaluator(device_configuration.device, vqvae_model, data_stream, configuration, results_path, experiment_name) # Load the model and optimizer state dicts vqvae_model, vqvae_optimizer = load_state_dicts( vqvae_model, checkpoint, 'model', 'optimizer') elif configuration['decoder_type'] == 'wavenet': vqvae_model = WaveNetVQVAE(configuration, data_stream.speaker_dic, device_configuration.device).to( device_configuration.device) evaluator = Evaluator(device_configuration.device, vqvae_model, data_stream, configuration, results_path, experiment_name) # Load the model and optimizer state dicts vqvae_model, vqvae_optimizer = load_state_dicts( vqvae_model, checkpoint, 'model', 'optimizer') else: raise NotImplementedError( "Decoder type '{}' isn't implemented for now".format( configuration['decoder_type'])) # Temporary backward compatibility if 'trainer_type' not in configuration: ConsoleLogger.error( "trainer_type was not found in configuration file. Use 'convolutional' by default." ) configuration['trainer_type'] = 'convolutional' if configuration['trainer_type'] == 'convolutional': trainer = ConvolutionalTrainer( device_configuration.device, data_stream, configuration, experiments_path, experiment_name, **{ 'model': vqvae_model, 'optimizer': vqvae_optimizer }) else: raise NotImplementedError( "Trainer type '{}' isn't implemented for now".format( configuration['trainer_type'])) # Use data parallelization if needed and available vqvae_model = vqvae_model return trainer, evaluator, configuration, device_configuration
'alignment_subset': args.alignment_subset, 'compute_clustering_metrics': args.compute_clustering_metrics, 'compute_groundtruth_average_phonemes_number': args.compute_groundtruth_average_phonemes_number, 'plot_clustering_metrics_evolution': args.plot_clustering_metrics_evolution, 'check_clustering_metrics_stability_over_seeds': args.check_clustering_metrics_stability_over_seeds, 'plot_gradient_stats': args.plot_gradient_stats } # If specified, print the summary of the model using the CPU device if args.summary: configuration = load_configuration(args.summary) ConsoleLogger.status('Printing the summary of the model...') device_configuration = DeviceConfiguration.load_from_configuration( configuration) model = PipelineFactory.build(configuration, device_configuration, default_experiments_path, default_experiment_name, default_results_path) print(model) sys.exit(0) if args.plot_experiments_losses: LossesPlotter().plot_training_losses( Experiments.load(args.experiments_configuration_path).experiments, args.experiments_path) sys.exit(0) if args.eval: Experiments.load(args.experiments_configuration_path).eval() sys.exit(0)