from tables import * import config import utils from spectrogram import Spectrogram conf = config.get_config() winsize = int(conf.get('Spectrogram', 'WindowSize')) nframes = int(conf.get('Tracks', 'LengthInSeconds')) * \ int(conf.get('Tracks', 'SampleRate')) stepsize = int(conf.get('Spectrogram', 'StepSize')) fftres = int(conf.get('Spectrogram', 'FFTResolution')) audio_folder = os.path.expanduser(conf.get('Input', 'AudioFolder')) numberofgenres = len(utils.list_subdirs(audio_folder)) wins = Spectrogram.wins(winsize, nframes, stepsize) bins = Spectrogram.bins(fftres) shape = Spectrogram.shape(wins, bins) class Track(IsDescription): """Description of a track in HDF5""" idnumber = Int32Col() name = StringCol(64) path = StringCol(512) genre = StringCol(32) # target = BoolCol(shape=(numberofgenres,)) target = Int8Col() spectrogram = Float32Col(dflt=0.0, shape=shape)
def __init__(self, path, which_set, feature="spectrogram", space="conv2d", axes=('b', 0, 1, 'c'), preprocess=False, seconds=None, window_size=None, window_type=None, step_size=None, tw_window_size=None, tw_window_type=None, tw_step_size=None, fft_resolution=None, seed=None, n_folds=4, run_n=0, verbose=False, print_params=True): super(AudioDataset, self).__init__() converter = space # signal is 1D if feature == "signal": space = "vector" converter = "signal" # inverting a spectrogram if the space is a vector doesn't make sense if space == "vector" and feature == "inv_spectrogram": feature == "spectrogram" feature_extractors = { "spectrogram": self.get_spectrogram_data, "inv_spectrogram": self.get_inv_spectrogram_data, "texture_window": self.get_texture_window_data, "signal": self.get_signal_data } spaces_converters = { "conv2d": AudioDataset.twod_to_conv2dspaces, "vector": AudioDataset.twod_to_vectorspaces, "signal": lambda x: x } index_converters = { "conv2d": lambda x: x, "vector": self.track_ids_to_frame_ids, "signal": lambda x: x } path = string_utils.preprocess(path) # init dynamic params tracks, genres = self.tracks_and_genres(path, seconds) samplerate = tracks[0].samplerate seconds = tracks[0].seconds if feature != "signal": spec_wins_per_track = len( Spectrogram.wins( seconds * samplerate, window_size, step_size ) ) if feature == "texture_window": tw_wins_per_track = len( TextureWindow.wins( spec_wins_per_track, tw_window_size, tw_step_size ) ) wins_per_track = tw_wins_per_track else: wins_per_track = spec_wins_per_track bins_per_track = Spectrogram.bins(fft_resolution) view_converters = { "conv2d": dense_design_matrix.DefaultViewConverter( (bins_per_track, wins_per_track, 1), axes ), "vector": None, "signal": None } view_converter = view_converters[converter] self.__dict__.update(locals()) del self.self if print_params: print(self)