def outer(self): if (not self.rnn_friendly and self._requires_reshape and (not isinstance(get_target_space(self), SequenceSpace) and not isinstance(get_target_space(self), SequenceDataSpace))): if isinstance(self.mlp.input_space, SequenceSpace): return SequenceSpace(get_target_space(self)) elif isinstance(self.mlp.input_space, SequenceDataSpace): return SequenceDataSpace(get_target_space(self)) else: return get_target_space(self)
def set_input_space(self, space): if ((not isinstance(space, SequenceSpace) and not isinstance(space, SequenceDataSpace)) or not isinstance(space.space, VectorSpace)): raise ValueError("Recurrent layer needs a SequenceSpace(" "VectorSpace) or SequenceDataSpace(VectorSpace)\ as input but received %s instead" % (space)) self.input_space = space if self.indices is not None: if len(self.indices) > 1: raise ValueError("Only indices = [-1] is supported right now") self.output_space = CompositeSpace( [VectorSpace(dim=self.dim) for _ in range(len(self.indices))] ) else: assert self.indices == [-1], "Only indices = [-1] works now" self.output_space = VectorSpace(dim=self.dim) else: if isinstance(self.input_space, SequenceSpace): self.output_space = SequenceSpace(VectorSpace(dim=self.dim)) elif isinstance(self.input_space, SequenceDataSpace): self.output_space =\ SequenceDataSpace(VectorSpace(dim=self.dim)) # Initialize the parameters rng = self.mlp.rng if self.irange is None: raise ValueError("Recurrent layer requires an irange value in " "order to initialize its weight matrices") input_dim = self.input_space.dim # W is the input-to-hidden matrix W = rng.uniform(-self.irange, self.irange, (input_dim, self.dim * 4)) # U is the hidden-to-hidden transition matrix U = np.zeros((self.dim, self.dim * 4)) for i in xrange(4): u = rng.randn(self.dim, self.dim) U[:, i*self.dim:(i+1)*self.dim], _ = scipy.linalg.qr(u) # b is the bias b = np.zeros((self.dim * 4,)) self._params = [ sharedX(W, name=(self.layer_name + '_W')), sharedX(U, name=(self.layer_name + '_U')), sharedX(b + self.init_bias, name=(self.layer_name + '_b')) ]
def __init__(self, which_dataset, which_set='train', stop=None): """ which_dataset : specify the dataset as 'short' or 'long' standardize option moves all data into the (0,1) interval. """ self.load_data(which_dataset, which_set) if which_dataset == 'midi': max_label = 108 elif which_dataset == 'nottingham': max_label = 93 elif which_dataset == 'muse': max_label = 105 elif which_dataset == 'jsb': max_label = 96 if stop is not None: assert stop <= len(self.raw_data) self.raw_data = self.raw_data[:stop] source = ('features', 'targets') space = CompositeSpace([ SequenceDataSpace(VectorSpace(dim=max_label)), SequenceDataSpace(VectorSpace(dim=max_label)) ]) X = np.asarray([ np.asarray([ self.list_to_nparray(time_step, max_label) for time_step in np.asarray(self.raw_data[i][:-1]) ]) for i in xrange(len(self.raw_data)) ]) y = np.asarray([ np.asarray([ self.list_to_nparray(time_step, max_label) for time_step in np.asarray(self.raw_data[i][1:]) ]) for i in xrange(len(self.raw_data)) ]) super(MusicSequence, self).__init__(data=(X, y), data_specs=(space, source))
def __init__(self, which_set, data_mode, context_len=None, shuffle=True): self._load_data(which_set, context_len, data_mode) source = ('features', 'targets') space = CompositeSpace([ SequenceDataSpace(IndexSpace(dim=1, max_labels=self._max_labels)), SequenceDataSpace(IndexSpace(dim=1, max_labels=self._max_labels)) ]) if context_len is None: context_len = len(self._raw_data) - 1 X = np.asarray([ self._raw_data[:-1][i * context_len:(i + 1) * context_len, np.newaxis] for i in range( int(np.ceil((len(self._raw_data) - 1) / float(context_len)))) ]) y = np.asarray([ self._raw_data[1:][i * context_len:(i + 1) * context_len, np.newaxis] for i in range( int(np.ceil((len(self._raw_data) - 1) / float(context_len)))) ]) super(PennTreebankSequences, self).__init__(data=(X, y), data_specs=(space, source))
for _features, _speaker_id, d in all_features_and_durs: for f in _features: feature_name = f[0] feature_dict.setdefault(feature_name, len(feature_dict)) if args.write_features_filename: log.info('.. Writing features to %s', args.write_features_filename) durmodel_utils.write_features( args.write_features_filename, feature_dict) X, y = create_raw_matrices( feature_dict, all_features_and_durs, nonsilence_phonemes) source = ('features', 'targets') space = CompositeSpace([ SequenceDataSpace(VectorSpace(dim=len(feature_dict))), SequenceDataSpace(VectorSpace(dim=2)) ]) from durmodel_elements import DurationSequencesDataset dataset = DurationSequencesDataset( data=(X, y), data_specs=(space, source)) else: full_features_and_durs = reduce( lambda prev, curr: prev.extend(curr) or prev, gen_fearures) if args.read_features_filename: log.info('.. Reading features from %s', args.read_features_filename)
def __init__( self, path, name='', # optional name # selectors subjects='all', # optional selector (list) or 'all' trial_types='all', # optional selector (list) or 'all' trial_numbers='all', # optional selector (list) or 'all' conditions='all', # optional selector (list) or 'all' partitioner=None, channel_filter=NoChannelFilter( ), # optional channel filter, default: keep all channel_names=None, # optional channel names (for metadata) label_map=None, # optional conversion of labels remove_dc_offset=False, # optional subtraction of channel mean, usually done already earlier resample=None, # optional down-sampling # optional sub-sequences selection start_sample=0, stop_sample=None, # optional for selection of sub-sequences # optional signal filter to by applied before spitting the signal signal_filter=None, # windowing parameters frame_size=-1, hop_size=-1, # values > 0 will lead to windowing hop_fraction=None, # alternative to specifying absolute hop_size # # optional spectrum parameters, n_fft = 0 keeps raw data # n_fft = 0, # n_freq_bins = None, # spectrum_log_amplitude = False, # spectrum_normalization_mode = None, # include_phase = False, flatten_channels=False, # layout='tf', # (0,1)-axes layout tf=time x features or ft=features x time # save_matrix_path = None, keep_metadata=False, target_mode='label', ): ''' Constructor ''' # save params self.params = locals().copy() del self.params['self'] # print self.params # TODO: get the whole filtering into an extra class datafiles_metadata, metadb = load_datafiles_metadata(path) # print datafiles_metadata def apply_filters(filters, node): if isinstance(node, dict): filtered = [] keepkeys = filters[0] for key, value in node.items(): if keepkeys == 'all' or key in keepkeys: filtered.extend(apply_filters(filters[1:], value)) return filtered else: return node # [node] # keep only files that match the metadata filters self.datafiles = apply_filters( [subjects, trial_types, trial_numbers, conditions], datafiles_metadata) # copy metadata for retained files self.metadb = {} for datafile in self.datafiles: self.metadb[datafile] = metadb[datafile] # print self.datafiles # print self.metadb self.name = name if partitioner is not None: self.datafiles = partitioner.get_partition(self.name, self.metadb) # self.include_phase = include_phase # self.spectrum_normalization_mode = spectrum_normalization_mode # self.spectrum_log_amplitude = spectrum_log_amplitude self.sequence_partitions = [ ] # used to keep track of original sequences # metadata: [subject, trial_no, stimulus, channel, start, ] self.metadata = [] sequences = [] labels = [] targets = [] n_sequences = 0 print(hop_size) if frame_size > 0 and hop_size == -1 and hop_fraction is not None: hop_size = np.ceil(frame_size / hop_fraction) print(hop_size) if target_mode == 'next': # get 1 more value per frame as target frame_size += 1 # print 'frame size: {}'.format(frame_size) for i in xrange(len(self.datafiles)): with log_timing(log, 'loading data from {}'.format(self.datafiles[i])): # save start of next sequence self.sequence_partitions.append(n_sequences) data, metadata = load(os.path.join(path, self.datafiles[i])) # data, metadata = self.generate_test_data() label = metadata['label'] if label_map is not None: label = label_map[label] multi_channel_frames = [] multi_channel_targets = [] # process 1 channel at a time for channel in xrange(data.shape[1]): # filter channels if not channel_filter.keep_channel(channel): continue samples = data[:, channel] # print samples # subtract channel mean #FIXME if remove_dc_offset: samples -= samples.mean() # down-sample if requested if resample is not None and resample[0] != resample[1]: samples = librosa.resample(samples, resample[0], resample[1]) # apply optional signal filter after down-sampling -> requires lower order if signal_filter is not None: samples = signal_filter.process(samples) # get sub-sequence in resampled space # log.info('using samples {}..{} of {}'.format(start_sample,stop_sample, samples.shape)) samples = samples[start_sample:stop_sample] # print start_sample, stop_sample, samples.shape # if n_fft is not None and n_fft > 0: # Optionally: # ### frequency spectrum branch ### # # # transform to spectogram # hop_length = n_fft / 4; # # ''' # from http://theremin.ucsd.edu/~bmcfee/librosadoc/librosa.html # >>> # Get a power spectrogram from a waveform y # >>> S = np.abs(librosa.stft(y)) ** 2 # >>> log_S = librosa.logamplitude(S) # ''' # # S = librosa.core.stft(samples, n_fft=n_fft, hop_length=hop_length) # # mag = np.abs(S) # magnitude spectrum # mag = np.abs(S)**2 # power spectrum # # # include phase information if requested # if self.include_phase: # # phase = np.unwrap(np.angle(S)) # phase = np.angle(S) # # # Optionally: cut off high bands # if n_freq_bins is not None: # mag = mag[0:n_freq_bins, :] # if self.include_phase: # phase = phase[0:n_freq_bins, :] # # if self.spectrum_log_amplitude: # mag = librosa.logamplitude(mag) # # s = mag # for normalization # # ''' # NOTE on normalization: # It depends on the structure of a neural network and (even more) # on the properties of data. There is no best normalization algorithm # because if there would be one, it would be used everywhere by default... # # In theory, there is no requirement for the data to be normalized at all. # This is a purely practical thing because in practice convergence could # take forever if your input is spread out too much. The simplest would be # to just normalize it by scaling your data to (-1,1) (or (0,1) depending # on activation function), and in most cases it does work. If your # algorithm converges well, then this is your answer. If not, there are # too many possible problems and methods to outline here without knowing # the actual data. # ''' # # ## normalize to mean 0, std 1 # if self.spectrum_normalization_mode == 'mean0_std1': # # s = preprocessing.scale(s, axis=0); # mean = np.mean(s) # std = np.std(s) # s = (s - mean) / std # # ## normalize by linear transform to [0,1] # elif self.spectrum_normalization_mode == 'linear_0_1': # s = s / np.max(s) # # ## normalize by linear transform to [-1,1] # elif self.spectrum_normalization_mode == 'linear_-1_1': # s = -1 + 2 * (s - np.min(s)) / (np.max(s) - np.min(s)) # # elif self.spectrum_normalization_mode is not None: # raise ValueError( # 'unsupported spectrum normalization mode {}'.format( # self.spectrum_normalization_mode) # ) # # #print s.mean(axis=0) # #print s.std(axis=0) # # # include phase information if requested # if self.include_phase: # # normalize phase to [-1.1] # phase = phase / np.pi # s = np.vstack([s, phase]) # # # transpose to fit pylearn2 layout # s = np.transpose(s) # # print s.shape # # ### end of frequency spectrum branch ### # else: ### raw waveform branch ### # normalize to max amplitude 1 s = librosa.util.normalize(samples) # add 2nd data dimension # s = s.reshape(s.shape[0], 1) # print s.shape ### end of raw waveform branch ### s = np.asfarray(s, dtype='float32') if frame_size > 0 and hop_size > 0: # print 'frame size: {}'.format(frame_size) s = s.copy( ) # FIXME: THIS IS NECESSARY - OTHERWISE, THE FOLLOWING OP DOES NOT WORK!!!! frames = compute_frames(s, frame_length=frame_size, hop_length=hop_size) # frames = librosa.util.frame(s, frame_length=frame_size, hop_length=hop_size) else: frames = s del s # print frames.shape if target_mode == 'next': frame_targets = np.empty(len(frames)) tmp = [] for f, frame in enumerate(frames): tmp.append(frame[:-1]) frame_targets[f] = frame[-1] frames = np.asarray(tmp) # print frames.shape # for f, frm in enumerate(frames): # print frm, frame_targets[f] # # FIXME: OK so far if flatten_channels: # add artificial channel dimension frames = frames.reshape( (frames.shape[0], frames.shape[1], frames.shape[2], 1)) # print frames.shape sequences.append(frames) # increment counter by new number of frames n_sequences += frames.shape[0] if keep_metadata: # determine channel name channel_name = None if channel_names is not None: channel_name = channel_names[channel] elif 'channels' in metadata: channel_name = metadata['channels'][channel] self.metadata.append({ 'subject': metadata['subject'], # subject 'trial_type': metadata['trial_type'], # trial_type 'trial_no': metadata['trial_no'], # trial_no 'condition': metadata['condition'], # condition 'channel': channel, # channel 'channel_name': channel_name, 'start': self.sequence_partitions[-1], # start 'stop': n_sequences # stop }) for _ in xrange(frames.shape[0]): labels.append(label) if target_mode == 'next': for next in frame_targets: targets.append(next) else: multi_channel_frames.append(frames) if target_mode == 'next': multi_channel_targets.append(frame_targets) ### end of channel iteration ### # print np.asarray(multi_channel_frames, dtype=np.int) # # FIXME: OK so far if not flatten_channels: # turn list into array multi_channel_frames = np.asfarray(multi_channel_frames, dtype='float32') # [channels x frames x time x freq] -> cb01 # [channels x frames x time x 1] -> cb0. # move channel dimension to end multi_channel_frames = np.rollaxis( multi_channel_frames, 0, len(multi_channel_frames.shape)) # print multi_channel_frames.shape log.info(multi_channel_frames.shape) sequences.append(multi_channel_frames) # increment counter by new number of frames n_sequences += multi_channel_frames.shape[0] if keep_metadata: self.metadata.append({ 'subject': metadata['subject'], # subject 'trial_type': metadata['trial_type'], # trial_type 'trial_no': metadata['trial_no'], # trial_no 'condition': metadata['condition'], # condition 'channel': 'all', # channel 'start': self.sequence_partitions[-1], # start 'stop': n_sequences # stop }) for _ in xrange(multi_channel_frames.shape[0]): labels.append(label) if target_mode == 'next': multi_channel_targets = np.asfarray( multi_channel_targets, dtype='float32') targets.append(multi_channel_targets.T) ### end of datafile iteration ### # print sequences[0].shape # print np.asarray(sequences[0], dtype=np.int) # # FIXME: looks OK # turn into numpy arrays sequences = np.vstack(sequences) # sequences = np.asarray(sequences).squeeze() # sequences = sequences.reshape(sequences.shape[0]*sequences.shape[1], sequences.shape[2]) print('sequences: {}'.format(sequences.shape)) labels = np.hstack(labels) self.labels = labels print('labels: {}'.format(labels.shape)) if target_mode == 'label': targets = labels.copy() ## copy targets to fit SequenceDataSpace(VectorSpace) structure (*, frame_size, 12) # targets = targets.reshape((targets.shape[0], 1)) # targets = np.repeat(targets, frame_size, axis=1) # print targets.shape # one_hot_formatter = OneHotFormatter(max(targets.max() + 1, len(label_map)), dtype=np.int) # one_hot_y = one_hot_formatter.format(targets) # print one_hot_y.shape ## copy targets to fit SequenceDataSpace(IndexSpace) structure -> (*, frame_size, 1) targets = targets.reshape((targets.shape[0], 1)) targets = np.repeat(targets, frame_size, axis=1) targets = targets.reshape((targets.shape[0], targets.shape[1], 1)) print(targets.shape) elif target_mode == 'next': targets = np.concatenate(targets) targets = targets.reshape((targets.shape[0], 1, targets.shape[1])) print('targets: {}'.format(targets.shape)) n_channels = sequences.shape[2] print('number of channels: {}'.format(n_channels)) # if layout == 'ft': # swap axes to (batch, feature, time, channels) # sequences = sequences.swapaxes(1, 2) log.debug('final dataset shape: {} (b,0,1,c)'.format(sequences.shape)) source = ('features', 'targets') # space = CompositeSpace([ # # VectorSequenceSpace(dim=64), # SequenceSpace(VectorSpace(dim=64)), # VectorSpace(dim=12), # ]) if target_mode == 'label': space = CompositeSpace([ SequenceDataSpace(VectorSpace(dim=n_channels)), # SequenceDataSpace(VectorSpace(dim=12)), SequenceDataSpace(IndexSpace(dim=1, max_labels=12)), # SequenceDataSpace(IndexSpace(dim=512, max_labels=12)), ]) elif target_mode == 'next': space = CompositeSpace([ # does not work with VectorSpacesDataset # SequenceSpace(VectorSpace(dim=64)), # SequenceSpace(VectorSpace(dim=64)) SequenceDataSpace(VectorSpace(dim=n_channels)), SequenceDataSpace(VectorSpace(dim=n_channels)) # VectorSpace(dim=n_channels) ]) # source = ('features') # space = SequenceSpace(VectorSpace(dim=64)) print('sequences: {}'.format(sequences.shape)) print('targets: {}'.format(targets.shape)) # for i, seq in enumerate(sequences): # print np.asarray(seq, dtype=np.int) # print np.asarray(targets[i], dtype=np.int) # break # # FIXME: looks OK # SequenceDataSpace(IndexSpace(dim=1, max_labels=self._max_labels)), if target_mode == 'label': super(MultiChannelEEGSequencesDataset, self).__init__( # data=(sequences, one_hot_y), # works with vectorspace-target data=(sequences, targets), # works with indexspace-target # data=sequences, data_specs=(space, source)) elif target_mode == 'next': super(MultiChannelEEGSequencesDataset, self).__init__( # data=(sequences, one_hot_y), # works with vectorspace-target data=(sequences, targets), # works with indexspace-target # data=sequences, data_specs=(space, source))
class ChainDataset(Dataset): """Training data generator. Supports the PyLearn2 dataset interface. """ num_states = 3 trans_prob = numpy.array([[0.1, 0.5, 0.4], [0.1, 0.9, 0.0], [0.3, 0.3, 0.4]]) values, vectors = numpy.linalg.eig(trans_prob.T) equilibrium = vectors[:, values.argmax()] equilibrium = equilibrium / equilibrium.sum() trans_entropy = trans_prob * numpy.log(trans_prob + 1e-6) entropy = equilibrium.dot(trans_entropy).sum() data_specs = (SequenceDataSpace(IndexSpace(max_labels=num_states, dim=1)), 'x') def __init__(self, rng, seq_len): update_instance(self, locals()) def iterator(self, batch_size, data_specs, return_tuple, mode, num_batches, rng=None): """Returns a PyLearn2 compatible iterator.""" assert return_tuple dataset = self class Iterator(six.Iterator): # This is not true, but let PyLearn2 think that this # iterator is not stochastic. # Makes life easier for now. stochastic = False num_examples = num_batches * batch_size def __init__(self, **kwargs): self.batches_retrieved = 0 def __iter__(self): return self def __next__(self): if self.batches_retrieved < num_batches: self.batches_retrieved += 1 return (dataset._next_batch(batch_size)[..., None],) raise StopIteration() return Iterator() def get_num_examples(self): """Part of the PyLearn2 Dataset interface.""" return float('inf') def _next_single(self): states = [0] while len(states) != self.seq_len: states.append(numpy.random.multinomial( 1, self.trans_prob[states[-1]]).argmax()) return states def _next_batch(self, batch_size): """Generate random sequences from the family.""" x = numpy.zeros((self.seq_len, batch_size), dtype='int64') for i in range(batch_size): x[:, i] = self._next_single() return x