def __init__(self, dataset_dir='../../datasets'): filename = 'Nottingham.zip' source = 'http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.zip' super(Nottingham, self).__init__(filename=filename, source=source, dataset_dir=dataset_dir) # now the file path has been installed to self.dataset_locations directory # grab the appropriate filenames train_filenames = os.path.join(self.dataset_location, 'Nottingham', 'train', '*.mid') valid_filenames = os.path.join(self.dataset_location, 'Nottingham', 'valid', '*.mid') test_filenames = os.path.join(self.dataset_location, 'Nottingham', 'test', '*.mid') # glob the files train_files = glob.glob(train_filenames) valid_files = glob.glob(valid_filenames) test_files = glob.glob(test_filenames) # grab the datasets from midireading the files train_datasets = [midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in train_files] valid_datasets = [midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in valid_files] test_datasets = [midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in test_files] # get the data shapes self.train_shapes = [train.shape for train in train_datasets] self.valid_shapes = [valid.shape for valid in valid_datasets] self.test_shapes = [test.shape for test in test_datasets] # put them into shared variables log.debug('Putting Nottingham into theano shared variables') self.train = dataset_shared(numpy.concatenate(train_datasets), name='nottingham_train', borrow=True) self.valid = dataset_shared(numpy.concatenate(valid_datasets), name='nottingham_valid', borrow=True) self.test = dataset_shared(numpy.concatenate(test_datasets), name='nottingham_test', borrow=True)
def __init__(self, path='datasets/Nottingham', source='http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.zip', train_filter='.*train.*', valid_filter='.*valid.*', test_filter='.*test.*', ): super(Nottingham, self).__init__(path=path, source=source, train_filter=train_filter, valid_filter=valid_filter, test_filter=test_filter) # grab the datasets from midireading the files train_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(config.floatX) for f in find_files(self.path, train_filter) ] valid_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(config.floatX) for f in find_files(self.path, valid_filter) ] test_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(config.floatX) for f in find_files(self.path, test_filter) ] self.train_inputs = numpy.concatenate(train_datasets) self.train_targets = None self.valid_inputs = numpy.concatenate(valid_datasets) self.valid_targets = None self.test_inputs = numpy.concatenate(test_datasets) self.test_targets = None
def __init__(self, path='datasets/MuseData', source='http://www-etud.iro.umontreal.ca/~boulanni/MuseData.zip', train_filter='.*train.*', valid_filter='.*valid.*', test_filter='.*test.*', ): super(MuseData, self).__init__(path=path, source=source, train_filter=train_filter, valid_filter=valid_filter, test_filter=test_filter) # grab the datasets from midireading the files train_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in find_files(self.path, train_filter) ] valid_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in find_files(self.path, valid_filter) ] test_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in find_files(self.path, test_filter) ] self.train_inputs = numpy.concatenate(train_datasets) self.train_targets = None self.valid_inputs = numpy.concatenate(valid_datasets) self.valid_targets = None self.test_inputs = numpy.concatenate(test_datasets) self.test_targets = None
def __init__(self, dataset_dir='../../datasets'): """ Parameters ---------- dataset_dir : str The `dataset_dir` parameter to a ``FileDataset``. """ log.debug("Loading MuseData midi dataset...") filename = 'MuseData.zip' source = 'http://www-etud.iro.umontreal.ca/~boulanni/MuseData.zip' super(MuseData, self).__init__(filename=filename, source=source, dataset_dir=dataset_dir) # now the file path has been installed to self.dataset_locations directory # grab the appropriate filenames train_filenames = os.path.join(self.dataset_location, 'MuseData', 'train', '*.mid') valid_filenames = os.path.join(self.dataset_location, 'MuseData', 'valid', '*.mid') test_filenames = os.path.join(self.dataset_location, 'MuseData', 'test', '*.mid') # glob the files train_files = glob.glob(train_filenames) valid_files = glob.glob(valid_filenames) test_files = glob.glob(test_filenames) # grab the datasets from midireading the files train_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in train_files ] valid_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in valid_files ] test_datasets = [ midiread(f, r=(21, 109), dt=0.3).piano_roll.astype(theano.config.floatX) for f in test_files ] # get the data shapes self.train_shapes = [train.shape for train in train_datasets] self.valid_shapes = [valid.shape for valid in valid_datasets] self.test_shapes = [test.shape for test in test_datasets] # put them into shared variables log.debug('Putting MuseData into theano shared variables') self.train = dataset_shared(numpy.concatenate(train_datasets), name='muse_train', borrow=True) self.valid = dataset_shared(numpy.concatenate(valid_datasets), name='muse_valid', borrow=True) self.test = dataset_shared(numpy.concatenate(test_datasets), name='muse_test', borrow=True)