def _init(self, model_name): if model_name is not None: self.model_definition = model_name else: self.model_definition = self.config["model"] if self.model_definition: self.feature_mode = self.model_definition.split('_')[0] else: self.feature_mode = None self.max_files_per_class = self.config.get( "training/max_files_per_class", None) self.dataset = None if self.experiment_name: self._model_dir = os.path.join( os.path.expanduser(self.config["paths/model_dir"]), self.experiment_name) self._experiment_config_path = os.path.join( self._model_dir, self.config['experiment/config_path']) # if these don't exist, we're not actually running anything if self.model_definition and self.feature_mode: utils.create_directory(self._model_dir)
def _init_cross_validation(self, test_set): self._cv_model_dir = os.path.join(self._model_dir, test_set) self._params_dir = os.path.join(self._cv_model_dir, self.config["experiment/params_dir"]) self._training_loss_path = os.path.join( self._cv_model_dir, self.config['experiment/training_loss']) if os.path.exists(self._cv_model_dir): logger.warning("Cleaning old experiment: {}".format( self._cv_model_dir)) utils.create_directory( self._cv_model_dir, # aka if DO the clean, recreate. recreate=(not self.skip_cleaning)) utils.create_directory(self._params_dir)
def _init_cross_validation(self, test_set): self._cv_model_dir = os.path.join(self._model_dir, test_set) self._params_dir = os.path.join( self._cv_model_dir, self.config["experiment/params_dir"]) self._training_loss_path = os.path.join( self._cv_model_dir, self.config['experiment/training_loss']) if os.path.exists(self._cv_model_dir): logger.warning("Cleaning old experiment: {}".format( self._cv_model_dir)) utils.create_directory(self._cv_model_dir, # aka if DO the clean, recreate. recreate=(not self.skip_cleaning)) utils.create_directory(self._params_dir)
def test_create_directory(workspace): dname = os.path.join(workspace, "oh_hello") assert not os.path.exists(dname) assert utils.create_directory(dname) assert utils.create_directory(dname)
def cqt_from_dataset(dataset, write_dir, cqt_params=None, audio_params=None, harmonic_params=None, num_cpus=-1, verbose=50, skip_existing=True): """Compute CQT representation over audio files referenced by a dataframe, and return a new dataframe also containing a column referencing the cqt files. Parameters ---------- dataset : hcnn.data.Dataset Dataset containing references to the audio files. write_dir : str Directory to write to. cqt_params : dict, default=None Parameters to use for CQT computation. audio_params : dict, default=None Parameters to use for loading the audio file. harmonic_params : dict, default=None Parameters to use on top of `cqt_params` for the harmonic cqt. num_cpus : int, default=-1 Number of parallel threads to use for computation. verbose : int Passed to cqt_many; for "Parallel" skip_existing : bool If files exist, don't try to extract them. Returns ------- updated_dataset : data.dataset.Dataset Dataset updated with parameters to the outputed features. """ utils.create_directory(write_dir) #### ## TODO IF skip_existing, try to reload the dataset with features ## And modify it instead of replacing it. def features_path_for_audio(audio_path): return os.path.join(write_dir, utils.filebase(audio_path) + ".npz") audio_paths = dataset.to_df()["audio_file"].tolist() cqt_paths = [features_path_for_audio(x) for x in audio_paths] failed_files = cqt_many(audio_paths, cqt_paths, cqt_params, audio_params, harmonic_params, num_cpus, verbose, skip_existing) logger.warning("{} files failed to extract.".format(len(failed_files))) feats_df = dataset.to_df() feats_df['cqt'] = pd.Series([None] * len(feats_df), index=feats_df.index) # Update the features field if the file was successfully created. for i, path in enumerate(cqt_paths): if os.path.exists(path): feats_df.loc[feats_df.index[i], "cqt"] = path else: logger.warning("CQT Not successfully created: {}".format(path)) return DS.Dataset(feats_df, dataset.split)