Ejemplo n.º 1
0
from tables import *
import config
import utils
from spectrogram import Spectrogram

conf = config.get_config()
winsize = int(conf.get('Spectrogram', 'WindowSize'))
nframes = int(conf.get('Tracks', 'LengthInSeconds')) * \
    int(conf.get('Tracks', 'SampleRate'))
stepsize = int(conf.get('Spectrogram', 'StepSize'))
fftres = int(conf.get('Spectrogram', 'FFTResolution'))
audio_folder = os.path.expanduser(conf.get('Input', 'AudioFolder'))
numberofgenres = len(utils.list_subdirs(audio_folder))
wins = Spectrogram.wins(winsize, nframes, stepsize)
bins = Spectrogram.bins(fftres)
shape = Spectrogram.shape(wins, bins)


class Track(IsDescription):

    """Description of a track in HDF5"""
    idnumber = Int32Col()
    name = StringCol(64)
    path = StringCol(512)
    genre = StringCol(32)
    # target = BoolCol(shape=(numberofgenres,))
    target = Int8Col()
    spectrogram = Float32Col(dflt=0.0, shape=shape)
Ejemplo n.º 2
0
    def __init__(self, path, which_set,
                 feature="spectrogram",
                 space="conv2d",
                 axes=('b', 0, 1, 'c'),
                 preprocess=False,
                 seconds=None,
                 window_size=None,
                 window_type=None,
                 step_size=None,
                 tw_window_size=None,
                 tw_window_type=None,
                 tw_step_size=None,
                 fft_resolution=None,
                 seed=None,
                 n_folds=4,
                 run_n=0,
                 verbose=False,
                 print_params=True):
        super(AudioDataset, self).__init__()

        converter = space

        # signal is 1D
        if feature == "signal":
            space = "vector"
            converter = "signal"

        # inverting a spectrogram if the space is a vector doesn't make sense
        if space == "vector" and feature == "inv_spectrogram":
            feature == "spectrogram"

        feature_extractors = {
            "spectrogram": self.get_spectrogram_data,
            "inv_spectrogram": self.get_inv_spectrogram_data,
            "texture_window": self.get_texture_window_data,
            "signal": self.get_signal_data
        }

        spaces_converters = {
            "conv2d": AudioDataset.twod_to_conv2dspaces,
            "vector": AudioDataset.twod_to_vectorspaces,
            "signal": lambda x: x
        }

        index_converters = {
            "conv2d": lambda x: x,
            "vector": self.track_ids_to_frame_ids,
            "signal": lambda x: x
        }

        path = string_utils.preprocess(path)

        # init dynamic params
        tracks, genres = self.tracks_and_genres(path, seconds)
        samplerate = tracks[0].samplerate
        seconds = tracks[0].seconds

        if feature != "signal":
            spec_wins_per_track = len(
                Spectrogram.wins(
                    seconds * samplerate,
                    window_size,
                    step_size
                )
            )

            if feature == "texture_window":
                tw_wins_per_track = len(
                    TextureWindow.wins(
                        spec_wins_per_track,
                        tw_window_size,
                        tw_step_size
                    )
                )
                wins_per_track = tw_wins_per_track
            else:
                wins_per_track = spec_wins_per_track

            bins_per_track = Spectrogram.bins(fft_resolution)

        view_converters = {
            "conv2d": dense_design_matrix.DefaultViewConverter(
                (bins_per_track, wins_per_track, 1), axes
            ),
            "vector": None,
            "signal": None
        }

        view_converter = view_converters[converter]

        self.__dict__.update(locals())
        del self.self

        if print_params:
            print(self)