def get_features(path, split, standardize): if path.find(',') != -1: paths = path.split(',') Xs = [ get_features(subpath, split, standardize) for subpath in paths ] X = np.concatenate( Xs, axis = 1) return X if path.endswith('.npy'): topo_view = np.load(path) else: topo_view = serial.load(path) if str(type(topo_view)).find('h5py') != -1: name ,= topo_view.keys() topo_view = topo_view[name].value.T if len(topo_view.shape) == 2: X = topo_view else: view_converter = DefaultViewConverter(topo_view.shape[1:]) print 'converting data' X = view_converter.topo_view_to_design_mat(topo_view) if split: X = np.concatenate( (np.abs(X),np.abs(-X)), axis=1) if standardize: assert False #bug: if X is test set, we need to subtract train mean, divide by train std X -= X.mean(axis=0) X /= np.sqrt(.01+np.var(X,axis=0)) return X
def make_viewer(mat, grid_shape=None, patch_shape=None, activation=None, pad=None, is_color = False, rescale = True): """ Given filters in rows, guesses dimensions of patches and nice dimensions for the PatchViewer and returns a PatchViewer containing visualizations of the filters""" num_channels = 1 if is_color: num_channels = 3 if grid_shape is None: grid_shape = PatchViewer.pick_shape(mat.shape[0] ) if patch_shape is None: assert mat.shape[1] % num_channels == 0 patch_shape = PatchViewer.pick_shape(mat.shape[1] / num_channels, exact = True) assert patch_shape[0] * patch_shape[1] * num_channels == mat.shape[1] rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color = is_color) topo_shape = (patch_shape[0], patch_shape[1], num_channels) view_converter = DefaultViewConverter(topo_shape) topo_view = view_converter.design_mat_to_topo_view(mat) for i in xrange(mat.shape[0]): if activation is not None: if hasattr(activation[0], '__iter__'): act = [a[i] for a in activation] else: act = activation[i] else: act = None patch = topo_view[i, :] rval.add_patch(patch, rescale=rescale, activation=act) return rval
def plot(w): nblocks = int(model.n_g / model.sparse_gmask.bw_g) filters_per_block = model.sparse_gmask.bw_g * model.sparse_hmask.bw_h block_viewer = PatchViewer((model.sparse_gmask.bw_g, model.sparse_hmask.bw_h), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) chan_viewer = PatchViewer(get_dims(nblocks), (block_viewer.image.shape[0], block_viewer.image.shape[1]), is_color = opts.color, pad=(5,5)) main_viewer = PatchViewer(get_dims(nplots), (chan_viewer.image.shape[0], chan_viewer.image.shape[1]), is_color = opts.color, pad=(10,10)) topo_shape = [opts.height, opts.width, opts.chans] view_converter = DefaultViewConverter(topo_shape) if opts.splitblocks: os.makedirs('filters/') for chan_i in xrange(nplots): viewer_dims = slice(0, None) if opts.color else chan_i for bidx in xrange(nblocks): for fidx in xrange(filters_per_block): fi = bidx * filters_per_block + fidx topo_view = view_converter.design_mat_to_topo_view(w[fi:fi+1,:]) try: block_viewer.add_patch(topo_view[0,:,:,viewer_dims]) except: import pdb; pdb.set_trace() if opts.splitblocks: pl.imshow(block_viewer.image, interpolation='nearest') pl.axis('off') pl.title('Wv - block %i, chan %i' % (bidx, chan_i)) pl.savefig('filters/filters_chan%i_block%i.png' % (bidx, chan_i)) chan_viewer.add_patch(block_viewer.image[:,:,viewer_dims] - 0.5) block_viewer.clear() main_viewer.add_patch(chan_viewer.image[:,:,viewer_dims] - 0.5) chan_viewer.clear() return copy.copy(main_viewer.image)
def set_topological_view(self, V, axes=('b', 0, 1, 'c')): """ Sets the dataset to represent V, where V is a batch of topological views of examples. .. todo:: Why is this parameter named 'V'? Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. axes : WRITEME """ assert not contains_nan(V) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(V) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not contains_nan(self.X) # Update data specs X_space = VectorSpace(dim=self.X.shape[1]) X_source = 'features' if self.y is None: space = X_space source = X_source else: if self.y.ndim == 1: dim = 1 else: dim = self.y.shape[-1] # This is to support old pickled models if getattr(self, 'y_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.y_labels) elif getattr(self, 'max_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.max_labels) else: y_space = VectorSpace(dim=dim) y_source = 'targets' Latent_space = VectorSpace(dim=self.latent.shape[-1]) Latent_source = 'latents' space = CompositeSpace((X_space, y_space,Latent_space)) source = (X_source, y_source,Latent_source) self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source)
def _transform_multi_channel_data(self, X, y): # Data partitioning parted_X, parted_y = self._partition_data(X=X, y=y, partition_size=self.window_size) transposed_X = np.transpose(parted_X, [0, 2, 1]) converted_X = np.reshape(transposed_X, (transposed_X.shape[0], transposed_X.shape[1], 1, transposed_X.shape[2])) # Create view converter view_converter = DefaultViewConverter(shape=self.sample_shape, axes=('b', 0, 1, 'c')) # Convert data into a design matrix view_converted_X = view_converter.topo_view_to_design_mat(converted_X) assert np.all(converted_X == view_converter.design_mat_to_topo_view(view_converted_X)) # Format the target into proper format sum_y = np.sum(parted_y, axis=1) sum_y[sum_y > 0] = 1 one_hot_formatter = OneHotFormatter(max_labels=self.n_classes) hot_y = one_hot_formatter.format(sum_y) return view_converted_X, hot_y, view_converter
def set_topological_view(self, topo_view, axes=('b', 0, 1, 'c')): ''' Sets the dataset to represent topo_view, where topo_view is a batch of topological views of examples. Parameters ---------- topo_view : ndarray An array containing a design matrix representation of training examples. ''' assert not np.any(np.isnan(topo_view)) frames = topo_view.shape[axes.index('b')] # pretend frames come in as batch dim rows = topo_view.shape[axes.index(0)] cols = topo_view.shape[axes.index(1)] channels = topo_view.shape[axes.index('c')] # leave out frames... self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(topo_view) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not np.any(np.isnan(self.X)) # Update data specs X_space = VectorSpace(dim = frames * rows * cols * channels) X_source = 'features' assert self.y is None, 'y not supported now' space = X_space source = X_source self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source)
def set_topological_view(self, V, axes=('b', 0, 1, 'c'), start=0): """ Sets the dataset to represent V, where V is a batch of topological views of examples. Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. If unspecified, the entire dataset (`self.X`) is used instead. TODO: why is this parameter named 'V'? """ assert not numpy.any(numpy.isnan(V)) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) X = self.view_converter.topo_view_to_design_mat(V) assert not numpy.any(numpy.isnan(X)) FaceBBoxDDMPytables.fill_hdf5(h5file = self.h5file, data_x = X, start = start)
def find_adversary(model, X0, label, P0=None, mu=.1, epsilon=.25, maxits=10, stop_thresh=0.5, griffin_lim=False): ''' Solves: y* = argmin_y f(y; label) s.t. y >= 0 and ||y-X0|| < e where f(y) is the cost associated the network associates with the pair (y,label) This can be solved using the projected gradient method: min_y f(y) s.t. y >= 0 and ||y-X0|| < e z = max(0, y^k - mu.f'(y^k)) y^k+1 = P(z) P(z) = min_u ||u-z|| s.t. {u | ||u-X0|| < e } Lagrangian(u,l) = L(u,l) = ||u-z|| + nu*(||u-X0|| - e) dL/du = u-z + nu*(u-X0) = 0 u = (1+nu)^-1 (z + nu*X0) KKT: ||u-x|| = e ||(1/(1+nu))(z + nu*x) - x|| = e ||(1/(1+nu))z + ((nu/(1+nu))-1)x|| = e ||(1/(1+nu))z - (1/(1+nu))x|| = e (1/(1+nu))||z-x|| = e nu = max(0,||z-x||/e - 1) function inputs: model - pylearn2 dnn model (implements fprop, cost) X0 - an example that the model classifies correctly label - an incorrect label ''' # convert integer label into one-hot vector n_classes, n_examples = model.get_output_space().dim, X0.shape[0] nfft = 2 * (X0.shape[1] - 1) nhop = nfft // 2 # Set-up gradient computation w/ Theano in_batch = model.get_input_space().make_theano_batch() out_batch = model.get_output_space().make_theano_batch() #cost = model.cost(one_hot, model.fprop(in_batch)) cost = model.cost(out_batch, model.fprop(in_batch)) #cost = model.layers[-1].cost(one_hot, model.fprop(in_batch)) dCost = T.grad(cost * n_examples, in_batch) grad_theano = theano.function([in_batch, out_batch], dCost) fprop_theano = theano.function([in_batch], model.fprop(in_batch)) fcost_theano = theano.function([in_batch, out_batch], cost) input_space = model.get_input_space() if isinstance(input_space, Conv2DSpace): tframes, dim = input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) else: dim = input_space.dim tframes = 1 view_converter = None nframes = X0.shape[0] thop = 1. sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop)) if view_converter: def grad(batch, labels): data = np.vstack([ np.reshape(batch[i:i + tframes, :], (tframes * dim, )) for i in sup ]) topo_view = grad_theano( view_converter.get_formatted_batch(data, input_space), labels) design_mat = view_converter.topo_view_to_design_mat(topo_view) return np.vstack( [np.reshape(r, (tframes, dim)) for r in design_mat]) def fprop(batch): data = np.vstack([ np.reshape(batch[i:i + tframes, :], (tframes * dim, )) for i in sup ]) return fprop_theano( view_converter.get_formatted_batch(data, input_space)) def fcost(batch, labels): data = np.vstack([ np.reshape(batch[i:i + tframes, :], (tframes * dim, )) for i in sup ]) return fcost_theano( view_converter.get_formatted_batch(data, input_space), labels) else: grad = grad_theano fprop = fprop_theano fcost = fcost_theano one_hot = np.zeros((len(sup), n_classes), dtype=np.float32) one_hot[:, label] = 1 X0 = X0[:len(sup) * tframes, :] if P0 is not None: P0 = P0[:len(sup) * tframes, :] # projected gradient: last_pred = 0 #Y = np.array(np.random.rand(*X0.shape), dtype=np.float32) Y = np.copy(X0) Y_old = np.copy(Y) t_old = 1 #print 'cost(X0,y): ', fcost(X0, one_hot) for i in xrange(maxits): # gradient step g = grad(Y, one_hot) Z = Y - mu * np.sign(g) #print 'cost(X{},y): {}'.format(i+1, fcost(Z, one_hot)) # non-negative projection Z = Z * (Z > 0) if griffin_lim: Z, P0 = griffin_lim_proj(np.hstack((Z, Z[:, -2:-nfft / 2 - 1:-1])), P0, its=0) # maximum allowable signal-to-noise projection nu = np.linalg.norm( (Z - X0)) / n_examples / epsilon - 1 # lagrange multiplier nu = nu * (nu >= 0) Y = (Z + nu * X0) / (1 + nu) # FISTA momentum # t = .5 + np.sqrt(1+4*t_old**2)/2. # alpha = (t_old - 1)/t # Y += alpha * (Y - Y_old) # Y_old = np.copy(Y) # t_old = t #''' # stopping condition # pred = np.sum(fprop(Y), axis=0) # pred /= np.sum(pred) #print 'iteration: {}, pred[label]: {}, nu: {}, snr: {}'.format(i, pred[label], nu, 20*np.log10(np.linalg.norm(X0)/np.linalg.norm(Y-X0))) # if pred[label] > stop_thresh: # break # elif pred[label] < last_pred - 1e-4: # pass#break # last_pred = pred[label] return Y, P0
def _execute(self): batch_size = self.batch_size feature_type = self.feature_type pooling_region_counts = self.pooling_region_counts dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() assert full_X.dtype == 'float32' num_examples = full_X.shape[0] assert num_examples == expected_num_examples print 'restricting to examples from classes 0 and 1' full_X = full_X[dataset.y_fine < 2, :] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') assert V.type.dtype == 'float32' model.make_pseudoparams() d = model.infer(V=V) H = d['H_hat'] Mu1 = d['S_hat'] G = d['G_hat'] if len(G) != 1: raise NotImplementedError( "only supports two layer pd-dbms for now") G, = G assert H.dtype == 'float32' assert Mu1.dtype == 'float32' nfeat = model.s3c.nhid + model.dbm.rbms[0].nhid if self.feature_type == 'map_hs': feat = (H > 0.5) * Mu1 raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'map_h': feat = T.cast(H > 0.5, dtype='float32') raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'exp_hs': feat = H * Mu1 raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'exp_hs_split': Z = H * Mu1 pos = T.clip(Z, 0., 1e32) neg = T.clip(-Z, 0, 1e32) feat = T.concatenate((pos, neg), axis=1) nfeat *= 2 raise NotImplementedError("doesn't support layer 2") elif self.feature_type == 'exp_h,exp_g': feat = T.concatenate((H, G), axis=1) elif self.feature_type == 'exp_h_thresh': feat = H * (H > .01) raise NotImplementedError("doesn't support layer 2") else: raise NotImplementedError() assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) if config.device.startswith('gpu') and nfeat >= 4000: f = halver(f, nfeat) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_feat_var = topo_feat_var.mean(axis=(1, 2)) elif self.pool_mode == 'max': region_feat_var = topo_feat_var.max(axis=(1, 2)) else: raise ValueError("Unknown pool mode: " + self.pool_mode) region_features = function([topo_feat_var], region_feat_var) def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval outputs = [ np.zeros((num_examples, count, count, nfeat), dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nfeat])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = np.cast['float32'](d.get_design_matrix()) t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i + batch_size, ...] = average_pool(count) t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) for output, save_path in zip(outputs, self.save_paths): np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
parser.add_option('--channels', action='store', type='int', dest='chans') parser.add_option('--color', action='store_true', dest='color', default=False) parser.add_option('--layer', action='store', type='int', dest='layer', default=0) (opts, args) = parser.parse_args() nplots = opts.chans if opts.color: assert opts.chans == 3 nplots = 1 def get_dims(nf): num_rows = numpy.floor(numpy.sqrt(nf)) return (int(num_rows), int(numpy.ceil(nf / num_rows))) topo_shape = [opts.height, opts.width, opts.chans] viewconv = DefaultViewConverter(topo_shape) viewdims = slice(0, None) if opts.color else 0 # load model and retrieve parameters model = serial.load(opts.path) if isinstance(model, TemperedDBN): rbm = model.rbms[opts.layer] else: rbm = model wv = rbm.Wv.get_value().T wv_viewer = PatchViewer(get_dims(len(wv)), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) for i in xrange(len(wv)): topo_wvi = viewconv.design_mat_to_topo_view(wv[i:i+1]) wv_viewer.add_patch(topo_wvi[0])
parser.add_option('--top', action='store', type='int', dest='top', default=5) parser.add_option('--mu', action='store_true', dest='mu', default=False) parser.add_option('--wv_only', action='store_true', dest='wv_only', default=False) (opts, args) = parser.parse_args() nplots = opts.chans if opts.color: assert opts.chans == 3 nplots = 1 def get_dims(nf): num_rows = numpy.floor(numpy.sqrt(nf)) return (int(num_rows), int(numpy.ceil(nf / num_rows))) topo_shape = [opts.height, opts.width, opts.chans] viewconv = DefaultViewConverter(topo_shape) viewdims = slice(0, None) if opts.color else 0 # load model and retrieve parameters model = serial.load(opts.path) wv = model.Wv.get_value().T if opts.mu: wv = wv * model.mu.get_value()[:, None] view1 = PatchViewer(get_dims(len(wv)), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) for i in xrange(len(wv)): topo_wvi = viewconv.design_mat_to_topo_view(wv[i:i+1, :48*48]) view1.add_patch(topo_wvi[0]) view2 = PatchViewer(get_dims(len(wv)), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) for i in xrange(len(wv)):
max_filters = max([len(Wi) for Wi in W]) print 'max_filters = ', max_filters block_viewer = PatchViewer(get_dims(max_filters), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) main_viewer = PatchViewer(get_dims(nblocks), (block_viewer.image.shape[0], block_viewer.image.shape[1]), is_color = opts.color, pad=(5,5)) topo_shape = [opts.height, opts.width, opts.chans] view_converter = DefaultViewConverter(topo_shape) for di, w_di in enumerate(W): if opts.k == -1: # build "new_w" as linear combination of all previous filters if di > 0: new_w = numpy.dot(w_di, prev_w) else: new_w = w_di else: new_w = numpy.zeros((len(w_di), opts.height * opts.width)) if di else w_di for fi in xrange(len(w_di)): if opts.k != -1:
seglen = 30 x = x[:seglen * fs] # make sure format agrees with training data if len(x.shape) != 1: print 'making mono:' x = np.sum(x, axis=1) / 2. # mono if fs != 22050: print 'resampling to 22050 hz:' import scikits.samplerate as samplerate x = samplerate.resample(x, 22050. / fs, 'sinc_best') fs = 22050 if isinstance(input_space, Conv2DSpace): tframes, dim = input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) else: dim = input_space.dim tframes = 1 view_converter = None nfft = 2 * (dim - 1) nhop = nfft // 2 nframes = (len(x) - nfft) / nhop x = x[:(nframes - 1) * nhop + nfft] # truncate input to multiple of hopsize # format batches for 1d/2d nets thop = 1. sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop)) if view_converter:
def __init__(self, config, adv_model, which_set='train'): keys = ['train', 'test', 'valid'] assert which_set in keys # load hdf5 metadata self.hdf5 = tables.open_file(config['hdf5'], mode='r') data = self.hdf5.get_node('/', 'Data') param = self.hdf5.get_node('/', 'Param') self.file_index = param.file_index[0] self.file_dict = param.file_dict[0] self.label_list = param.label_list[0] self.targets = param.targets[0] self.nfft = param.fft[0]['nfft'] # load parition information self.support = config[which_set] self.file_list = config[which_set + '_files'] self.mean = config['mean'] self.mean = self.mean.reshape((np.prod(self.mean.shape), )) self.var = config['var'] self.var = self.var.reshape((np.prod(self.var.shape), )) self.istd = np.reciprocal(np.sqrt(self.var)) self.mask = (self.istd < 20) self.tframes = config['tframes'] # setup adversary self.adv_model = adv_model in_batch = adv_model.get_input_space().make_theano_batch() out_batch = adv_model.get_output_space().make_theano_batch() cost = adv_model.cost(out_batch, adv_model.fprop(in_batch)) dCost = T.grad(cost, in_batch) grad_theano = theano.function([in_batch, out_batch], dCost) fprop_theano = theano.function([in_batch], adv_model.fprop(in_batch)) fcost_theano = theano.function([in_batch, out_batch], cost) self.input_space = adv_model.get_input_space() if isinstance(self.input_space, Conv2DSpace): tframes, dim = self.input_space.shape view_converter = DefaultViewConverter((tframes, dim, 1)) def grad(batch, labels): topo_view = grad_theano( view_converter.get_formatted_batch(batch, self.input_space), labels) return view_converter.topo_view_to_design_mat(topo_view) def fprop(batch): return fprop_theano( view_converter.get_formatted_batch(batch, self.input_space)) def fcost(batch, labels): return fcost_theano( view_converter.get_formatted_batch(batch, self.input_space), labels) self.grad = grad self.fprop = fprop self.fcost = fcost super(AdversaryDataset, self).__init__(X=data.X, y=data.y, view_converter=view_converter) else: dim = self.input_space.dim tframes = 1 view_converter = None self.grad = grad_theano self.fprop = fprop_theano self.fcost = fcost_theano super(AdversaryDataset, self).__init__(X=data.X, y=data.y)
parser.add_option('--top', action='store', type='int', dest='top', default=5) parser.add_option('--mu', action='store_true', dest='mu', default=False) parser.add_option('--wv_only', action='store_true', dest='wv_only', default=False) (opts, args) = parser.parse_args() nplots = opts.chans if opts.color: assert opts.chans == 3 nplots = 1 def get_dims(nf): num_rows = numpy.floor(numpy.sqrt(nf)) return (int(num_rows), int(numpy.ceil(nf / num_rows))) topo_shape = [opts.height, opts.width, opts.chans] viewconv = DefaultViewConverter(topo_shape) viewdims = slice(0, None) if opts.color else 0 # load model and retrieve parameters model = serial.load(opts.path) wv = model.Wv.get_value().T if opts.mu: wv = wv * model.mu.get_value()[:, None] wv_viewer = PatchViewer(get_dims(len(wv)), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) for i in xrange(len(wv)): topo_wvi = viewconv.design_mat_to_topo_view(wv[i:i+1]) wv_viewer.add_patch(topo_wvi[0]) if opts.wv_only: wv_viewer.show()
class myDenseDesignMatrix(dense_design_matrix.DenseDesignMatrix): _default_seed = (17, 2, 946) def __init__(self, X=None, topo_view=None, y=None, latent = None, view_converter=None, axes=('b', 0, 1, 'c'), rng=_default_seed, preprocessor=None, fit_preprocessor=False, X_labels=None, y_labels=None): self.latent = latent self.X = X self.y = y self.view_converter = view_converter self.X_labels = X_labels self.y_labels = y_labels self._check_labels() if topo_view is not None: assert view_converter is None self.set_topological_view(topo_view, axes) else: assert X is not None, ("DenseDesignMatrix needs to be provided " "with either topo_view, or X") if view_converter is not None: # Get the topo_space (usually Conv2DSpace) from the # view_converter if not hasattr(view_converter, 'topo_space'): raise NotImplementedError("Not able to get a topo_space " "from this converter: %s" % view_converter) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = view_converter.topo_space else: self.X_topo_space = None # Update data specs, if not done in set_topological_view X_source = 'features' if X_labels is None: X_space = VectorSpace(dim=X.shape[1]) else: if X.ndim == 1: dim = 1 else: dim = X.shape[-1] X_space = IndexSpace(dim=dim, max_labels=X_labels) if y is None: space = X_space source = X_source else: if y.ndim == 1: dim = 1 else: dim = y.shape[-1] if y_labels is not None: y_space = IndexSpace(dim=dim, max_labels=y_labels) else: y_space = VectorSpace(dim=dim) y_source = 'targets' Latent_space = VectorSpace(dim=latent.shape[-1]) Latent_source = 'latents' space = CompositeSpace((X_space, y_space, Latent_space)) source = (X_source, y_source, Latent_source) self.data_specs = (space, source) self.X_space = X_space self.compress = False self.design_loc = None self.rng = make_np_rng(rng, which_method="random_integers") # Defaults for iterators self._iter_mode = resolve_iterator_class('sequential') self._iter_topo = False self._iter_targets = False self._iter_data_specs = (self.X_space, 'features') if preprocessor: preprocessor.apply(self, can_fit=fit_preprocessor) self.preprocessor = preprocessor def get_data(self): """ Returns all the data, as it is internally stored. The definition and format of these data are described in `self.get_data_specs()`. Returns ------- data : numpy matrix or 2-tuple of matrices The data """ if self.y is None: return self.X else: return (self.X, self.y, self.latent) def set_topological_view(self, V, axes=('b', 0, 1, 'c')): """ Sets the dataset to represent V, where V is a batch of topological views of examples. .. todo:: Why is this parameter named 'V'? Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. axes : WRITEME """ assert not contains_nan(V) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(V) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not contains_nan(self.X) # Update data specs X_space = VectorSpace(dim=self.X.shape[1]) X_source = 'features' if self.y is None: space = X_space source = X_source else: if self.y.ndim == 1: dim = 1 else: dim = self.y.shape[-1] # This is to support old pickled models if getattr(self, 'y_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.y_labels) elif getattr(self, 'max_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.max_labels) else: y_space = VectorSpace(dim=dim) y_source = 'targets' Latent_space = VectorSpace(dim=self.latent.shape[-1]) Latent_source = 'latents' space = CompositeSpace((X_space, y_space,Latent_space)) source = (X_source, y_source,Latent_source) self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source) def get_targets(self): """ .. todo:: WRITEME """ return self.y def get_latents(self): """ .. todo:: WRITEME """ return self.latent def get_batch_design(self, batch_size, include_labels=False): try: idx = self.rng.randint(self.X.shape[0] - batch_size + 1) except ValueError: if batch_size > self.X.shape[0]: reraise_as(ValueError("Requested %d examples from a dataset " "containing only %d." % (batch_size, self.X.shape[0]))) raise rx = self.X[idx:idx + batch_size, :] if include_labels: if self.y is None: return rx, None ry = self.y[idx:idx + batch_size] rlatent = self.latent[idx:idx + batch_size] return rx, ry,rlatent rx = np.cast[config.floatX](rx) return rx def get_batch_topo(self, batch_size, include_labels=False): """ .. todo:: WRITEME Parameters ---------- batch_size : int WRITEME include_labels : bool WRITEME """ if include_labels: batch_design, labels, latents= self.get_batch_design(batch_size, True) else: batch_design = self.get_batch_design(batch_size) rval = self.view_converter.design_mat_to_topo_view(batch_design) if include_labels: return rval, labels, latents return rval
def _execute(self): batch_size = self.batch_size pooling_region_counts = self.pooling_region_counts dataset_family = self.dataset_family which_set = self.which_set size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1],:] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) ) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') Z = T.dot(V, self.W) alpha = self.alpha if self.one_sided: feat = T.clip(abs(Z),alpha,1e30)-alpha else: pos = T.clip(Z,alpha,1e30) - alpha neg = T.clip(-Z,alpha,1e30) - alpha feat = T.concatenate((pos, neg), axis=1) assert feat.dtype == 'float32' print 'compiling theano function' f = function([V],feat) nfeat = self.W.get_value().shape[1] * (2-self.one_sided) if config.device.startswith('gpu') and nfeat >= 4000: f = halver(f, nfeat) topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')() region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1,2)) ) def average_pool( stride ): def point( p ): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32') for i in xrange(stride): for j in xrange(stride): rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] ) return rval outputs = [ np.zeros((num_examples,count,count,nfeat),dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nfeat] ) ) ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) if len(range(0,num_examples-batch_size+1,batch_size)) <= 0: print num_examples print batch_size for i in xrange(0,num_examples-batch_size+1,batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i+batch_size,:]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit = False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i+batch_size,...] = average_pool(count) t6 = time.time() print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5) for output, save_path in zip(outputs, self.save_paths): if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy' np.save(save_path,output) if nan > 0: warnings.warn(str(nan)+' features were nan')
def make_viewer(mat, grid_shape=None, patch_shape=None, activation=None, pad=None, is_color=False, rescale=True): """ Given filters in rows, guesses dimensions of patches and nice dimensions for the PatchViewer and returns a PatchViewer containing visualizations of the filters. Parameters ---------- mat : ndarray Values should lie in [-1, 1] if `rescale` is False. 0. always indicates medium gray, with negative values drawn as blacker and positive values drawn as whiter. A matrix with each row being a different image patch, OR a 4D tensor in ('b', 0, 1, 'c') format. If matrix, we assume it was flattened using the same procedure as a ('b', 0, 1, 'c') DefaultViewConverter uses. grid_shape : tuple, optional A tuple of two ints specifying the shape of the grad in the PatchViewer, in (rows, cols) format. If not specified, this function does its best to choose an aesthetically pleasing value. patch_shape : tupe, optional A tuple of two ints specifying the shape of the patch. If `mat` is 4D, this function gets the patch shape from the shape of `mat`. If `mat` is 2D and patch_shape is not specified, this function assumes the patches are perfectly square. activation : iterable An iterable collection describing some kind of activation value associated with each patch. This is indicated with a border around the patch whose color intensity increases with activation value. The individual activation values may be single floats to draw one border or iterable collections of floats to draw multiple borders with differing intensities around the patch. pad : int, optional The amount of padding to add between patches in the displayed image. is_color : int If True, assume the images are in color. Note needed if `mat` is in ('b', 0, 1, 'c') format since we can just look at its shape[-1]. rescale : bool If True, rescale each patch so that its highest magnitude pixel reaches a value of either 0 or 1 depending on the sign of that pixel. Returns ------- patch_viewer : PatchViewer A PatchViewer containing the patches stored in `mat`. """ num_channels = 1 if is_color: num_channels = 3 if grid_shape is None: grid_shape = PatchViewer.pick_shape(mat.shape[0]) if mat.ndim > 2: patch_shape = mat.shape[1:3] topo_view = mat num_channels = mat.shape[3] is_color = num_channels > 1 else: if patch_shape is None: assert mat.shape[1] % num_channels == 0 patch_shape = PatchViewer.pick_shape(mat.shape[1] // num_channels, exact=True) assert mat.shape[1] == (patch_shape[0] * patch_shape[1] * num_channels) topo_shape = (patch_shape[0], patch_shape[1], num_channels) view_converter = DefaultViewConverter(topo_shape) topo_view = view_converter.design_mat_to_topo_view(mat) rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color=is_color) for i in xrange(mat.shape[0]): if activation is not None: if hasattr(activation[0], '__iter__'): act = [a[i] for a in activation] else: act = activation[i] else: act = None patch = topo_view[i, :] rval.add_patch(patch, rescale=rescale, activation=act) return rval
def test_init_with_vc(): rng = np.random.RandomState([4, 5, 6]) d = DenseDesignMatrix(X=rng.randn(12, 5), view_converter=DefaultViewConverter([1, 2, 3]))
def __init__(self, which_set, base_path='${PYLEARN2_DATA_PATH}/icml_2013_emotions', start=None, stop=None, preprocessor=None, fit_preprocessor=False, axes=('b', 0, 1, 'c'), fit_test_preprocessor=False): """ which_set: A string specifying which portion of the dataset to load. Valid values are 'train' or 'public_test' base_path: The directory containing the .csv files from kaggle.com. This directory should be writable; if the .csv files haven't already been converted to npy, this class will convert them to save memory the next time they are loaded. fit_preprocessor: True if the preprocessor is allowed to fit the data. fit_test_preprocessor: If we construct a test set based on this dataset, should it be allowed to fit the test set? """ self.test_args = locals() self.test_args['which_set'] = 'public_test' self.test_args['fit_preprocessor'] = fit_test_preprocessor del self.test_args['start'] del self.test_args['stop'] del self.test_args['self'] files = {'train': 'train.csv', 'public_test': 'test.csv'} try: filename = files[which_set] except KeyError: raise ValueError("Unrecognized dataset name: " + which_set) path = base_path + '/' + filename path = preprocess(path) X, y = self._load_data(path, which_set == 'train') if start is not None: assert which_set != 'test' assert isinstance(start, int) assert isinstance(stop, int) assert start >= 0 assert start < stop assert stop <= X.shape[0] X = X[start:stop, :] if y is not None: y = y[start:stop, :] view_converter = DefaultViewConverter(shape=[48, 48, 1], axes=axes) super(EmotionsDataset, self).__init__(X=X, y=y, y_labels=7, view_converter=view_converter) if preprocessor: preprocessor.apply(self, can_fit=fit_preprocessor)
# check for global scaling if not opts.local: samples = samples / numpy.abs(samples).max() ############## # PLOT FILTERS ############## import pdb; pdb.set_trace() viewer = PatchViewer(get_dims(model.batch_size), (opts.height, opts.width), is_color = opts.color, pad=(2,2)) topo_shape = [opts.height, opts.width, opts.chans] view_converter = DefaultViewConverter(topo_shape) topo_view = view_converter.design_mat_to_topo_view(samples) for chan_i in xrange(nplots): topo_chan = topo_view if opts.color else topo_view[..., chan_i:chan_i+1] for bi in xrange(model.batch_size): viewer.add_patch(topo_chan[bi]) #pl.subplot(1, nplots, chan_i+1) #pl.imshow(viewer.image, interpolation=None) #pl.axis('off'); pl.title('samples (channel %i)' % chan_i) viewer.show()
quit(-1) models = [] try: for model in serial.load(model_path): models.append(model) except Exception as e: usage() print(model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: ") print(e) # load the test set with open('preprocessed_test_for_pylearn2.pkl') as f: dataset = pkl.load(f) dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter(shape=[32, 32, 1], axes=['b', 0, 1, 'c'])) print(models) predictions = [] print(len(models)) for model in models: print(model) model.set_batch_size(dataset.X.shape[0]) X = model.get_input_space().make_batch_theano() Y = model.fprop(X) # forward prop the test data y = T.argmax(Y, axis=1)
class TemporalDenseDesignMatrix(DenseDesignMatrix): ''' A class for representing datasets that can be stored as a dense design matrix, but whose examples are slices of width >= 2 rows each. ''' _default_seed = (17, 2, 946) def __init__(self, X=None, topo_view=None, y=None, view_converter=None, axes = ('b', 0, 1, 2, 'c'), rng=_default_seed, preprocessor = None, fit_preprocessor=False): ''' TODO: rewrite or just inherit... same as DenseDesignMatrix...??? Parameters ---------- X : ndarray, 2-dimensional, optional Should be supplied if `topo_view` is not. A design matrix of shape (number examples, number features) that defines the dataset. XXXXXXXXXXX not allowed topo_view : ndarray, optional Should be supplied if X is not. An array whose first dimension is of length number examples. The remaining dimensions are xamples with topological significance, e.g. for images the remaining axes are rows, columns, and channels. TODO: time is 0, ii is 1, jj is 2 y : ndarray, 1-dimensional(?), optional Labels or targets for each example. The semantics here are not quite nailed down for this yet. view_converter : object, optional An object for converting between the design matrix stored internally and the data that will be returned by iterators. rng : object, optional A random number generator used for picking random indices into the design matrix when choosing minibatches. ''' assert topo_view is not None, ( 'For TemporalDenseDesignMatrix, must provide topo_view (not X)' ) assert axes == ('b', 0, 1, 2, 'c') reduced_axes = ('b', 0, 1, 'c') super(TemporalDenseDesignMatrix, self).__init__( X = X, topo_view = topo_view, y = y, view_converter = view_converter, axes = reduced_axes, rng = rng, preprocessor = preprocessor, fit_preprocessor = fit_preprocessor ) self._X = self.X self.X = None # prevent other access def set_topological_view(self, topo_view, axes=('b', 0, 1, 'c')): ''' Sets the dataset to represent topo_view, where topo_view is a batch of topological views of examples. Parameters ---------- topo_view : ndarray An array containing a design matrix representation of training examples. ''' assert not np.any(np.isnan(topo_view)) frames = topo_view.shape[axes.index('b')] # pretend frames come in as batch dim rows = topo_view.shape[axes.index(0)] cols = topo_view.shape[axes.index(1)] channels = topo_view.shape[axes.index('c')] # leave out frames... self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(topo_view) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not np.any(np.isnan(self.X)) # Update data specs X_space = VectorSpace(dim = frames * rows * cols * channels) X_source = 'features' assert self.y is None, 'y not supported now' space = X_space source = X_source self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source) @functools.wraps(Dataset.iterator) def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None, data_specs=None, return_tuple=False): '''thin wrapper... TODO: doc''' assert mode == 'shuffled_sequential', ( 'Only shuffled_sequential mode is supported' ) assert data_specs != None, 'Must provide data_specs' assert len(data_specs) == 2, 'data_specs must include only one tuple for "features"' assert type(data_specs[0]) is CompositeSpace, 'must be composite space...??' assert data_specs[0].num_components == 1, 'must only have one component, features' assert data_specs[1][0] == 'features', 'data_specs must include only one tuple for "features"' output_space = data_specs[0].components[0] num_frames = output_space.shape[0] if num_batches is None: num_batches = 10 # another hack... just determines how often new iterators will be created? base_num_batches = num_batches * batch_size # Iterates through ONE example at a time # BEGIN HUGE HACK (enable self.X access just for this function) self.X = self._X base_iterator = super(TemporalDenseDesignMatrix, self).iterator( mode='random_slice', # to return continguous bits batch_size=num_frames, num_batches=base_num_batches, topo=topo, targets=targets, rng=rng, data_specs=data_specs, return_tuple=False) self.X = None # END HUGE HACK return CopyingConcatenatingIterator(base_iterator, how_many = batch_size)
class FaceBBoxDDMPytables(dense_design_matrix.DenseDesignMatrix): filters = tables.Filters(complib='blosc', complevel=1) h5file = None """ DenseDesignMatrix based on PyTables for face bounding boxes. """ def __init__(self, X=None, h5file=None, topo_view=None, y=None, view_converter=None, axes = ('b', 0, 1, 'c'), image_shape=None, receptive_field_shape=None, bbox_conversion_type=ConversionType.GUID, area_ratio=None, stride=None, use_output_map=True, rng=None): """ Parameters ---------- X : ndarray, 2-dimensional, optional Should be supplied if `topo_view` is not. A design matrix of shape (number examples, number features) that defines the dataset. topo_view : ndarray, optional Should be supplied if X is not. An array whose first dimension is of length number examples. The remaining dimensions are xamples with topological significance, e.g. for images the remaining axes are rows, columns, and channels. y : ndarray, 1-dimensional(?), optional Labels or targets for each example. The semantics here are not quite nailed down for this yet. view_converter : object, optional An object for converting between design matrices and topological views. Currently DefaultViewConverter is the only type available but later we may want to add one that uses the retina encoding that the U of T group uses. image_shape: list Shape of the images that we are processing. receptive_field_size: list Size of the receptive field of the convolutional neural network. stride: integer The stride that we have used for the convolution operation. rng : object, optional A random number generator used for picking random indices into the design matrix when choosing minibatches. """ if rng is None: rng = (17, 2, 946) assert image_shape is not None assert receptive_field_shape is not None assert stride is not None self.image_shape = image_shape self.receptive_field_shape = receptive_field_shape self.stride = stride self.use_output_map = use_output_map self.bbox_conversion_type = bbox_conversion_type self.h5file = h5file self.area_ratio = area_ratio self._deprecated_interface = True FaceBBoxDDMPytables.filters = tables.Filters(complib='blosc', complevel=1) super(FaceBBoxDDMPytables, self).__init__(X = X, topo_view = topo_view, y = y, view_converter = view_converter, axes = axes, rng = rng) def set_design_matrix(self, X, start = 0): """ Parameters ---------- X: Images """ assert (len(X.shape) == 2) assert self.h5file is not None assert not numpy.any(numpy.isnan(X)) if self.h5file.isopen and (self.h5file.mode == "w" or self.h5file.mode == "r+"): self.fill_hdf5(h5file=self.h5file, data_x=X, start=start) else: raise ValueError("H5File is not open or not in the writable mode!") def set_topological_view(self, V, axes=('b', 0, 1, 'c'), start=0): """ Sets the dataset to represent V, where V is a batch of topological views of examples. Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. If unspecified, the entire dataset (`self.X`) is used instead. TODO: why is this parameter named 'V'? """ assert not numpy.any(numpy.isnan(V)) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) X = self.view_converter.topo_view_to_design_mat(V) assert not numpy.any(numpy.isnan(X)) FaceBBoxDDMPytables.fill_hdf5(h5file = self.h5file, data_x = X, start = start) @functools.wraps(Dataset.iterator) def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None, data_specs=None, return_tuple=False): # build data_specs from topo and targets if needed if topo is None: topo = getattr(self, '_iter_topo', False) if data_specs[0] is not None: if isinstance(data_specs[0], Conv2DSpace) or isinstance(data_specs[0].components[0], Conv2DSpace): topo = True if topo: # self.iterator is called without a data_specs, and with # "topo=True", so we use the default topological space # stored in self.X_topo_space assert self.X_topo_space is not None X_space = self.X_topo_space else: X_space = self.X_space if targets is None: if "targets" in data_specs[1]: targets = True else: targets = False if data_specs is None: if targets: assert self.y is not None y_space = data_specs[0].components[1] space = CompositeSpace(components=(X_space, y_space)) source = ('features', 'targets') else: space = X_space source = 'features' print space data_specs = (space, source) # TODO: Refactor if mode is None: if hasattr(self, '_iter_subset_class'): mode = self._iter_subset_class else: raise ValueError('iteration mode not provided and no default ' 'mode set for %s' % str(self)) else: mode = resolve_iterator_class(mode) if batch_size is None: batch_size = getattr(self, '_iter_batch_size', None) if num_batches is None: num_batches = getattr(self, '_iter_num_batches', None) if rng is None and mode.stochastic: rng = self.rng if data_specs is None: data_specs = self._iter_data_specs return FaceBBoxDDMIterator(self, mode(self.X.shape[0], batch_size, num_batches, rng), img_shape=self.image_shape, receptive_field_shape=self.receptive_field_shape, stride=self.stride, bbox_conversion_type=self.bbox_conversion_type, topo=topo, targets=targets, area_ratio=self.area_ratio, use_output_map=self.use_output_map, data_specs=data_specs, return_tuple=return_tuple) @staticmethod def init_hdf5(path=None, shapes=None): """ Initialize hdf5 file to be used as a dataset """ assert shapes is not None x_shape, y_shape = shapes print "init_hdf5" # make pytables if path is None: if FaceBBoxDDMPytables.h5file is None: raise ValueError("path variable should not be empty.") else: h5file = FaceBBoxDDMPytables.h5file else: h5file = tables.openFile(path, mode = "w", title = "Google Face bounding boxes Dataset.") gcolumns = h5file.createGroup(h5file.root, "Data", "Data") atom = tables.Float32Atom() if config.floatX == 'float32' else tables.Float64Atom() filters = FaceBBoxDDMPytables.filters h5file.createCArray(gcolumns, 'X', atom = atom, shape = x_shape, title = "Images", filters = filters) h5file.createTable(gcolumns, 'bboxes', BoundingBox, title = "Face bounding boxes", filters = filters) return h5file, gcolumns @staticmethod def fill_hdf5(h5file, data_x, data_y = None, node = None, start = 0, batch_size = 5000): """ PyTables tends to crash if you write large data on them at once. This function write data on file in batches start: the start index to write data """ if node is None: node = h5file.root.Data if FaceBBoxDDMPytables.h5file is None: FaceBBoxDDMPytables.h5file = h5file data_size = data_x.shape[0] last = numpy.floor(data_size / float(batch_size)) * batch_size for i in xrange(0, data_size, batch_size): stop = i + numpy.mod(data_size, batch_size) if i >= last else i + batch_size assert len(range(start + i, start + stop)) == len(range(i, stop)) assert (start + stop) <= (node.X.shape[0]) node.X[start + i: start + stop, :] = data_x[i:stop, :] if data_y is not None: node.y[start + i: start + stop, :] = data_y[i:stop, :] h5file.flush() @staticmethod def resize(h5file, start, stop, remove_old_node=False): if h5file is None: raise ValueError("h5file should not be None.") data = h5file.root.Data node_name = "Data_%s_%s" % (start, stop) if remove_old_node: try: gcolumns = h5file.createGroup('/', node_name, "Data %s" % node_name) except tables.exceptions.NodeError: h5file.removeNode('/', node_name, 1) gcolumns = h5file.createGroup('/', node_name, "Data %s" % node_name) elif node_name in h5file.root: return h5file, getattr(h5file.root, node_name) else: gcolumns = h5file.createGroup('/', node_name, "Data %s" % node_name) if FaceBBoxDDMPytables.h5file is None: FaceBBoxDDMPytables.h5file = h5file start = 0 if start is None else start stop = gcolumns.X.nrows if stop is None else stop atom = tables.Float32Atom() if config.floatX == 'float32' else tables.Float64Atom() filters = FaceBBoxDDMPytables.filters x = h5file.createCArray(gcolumns, 'X', atom = atom, shape = ((stop - start, data.X.shape[1])), title = "Images", filters = filters) y = h5file.createTable(gcolumns, 'bboxes', BoundingBox, title = "Face bounding boxes", filters = filters) x[:] = data.X[start:stop] bboxes = get_image_bboxes(slice(start, stop), data.bboxes) y.append(bboxes) if remove_old_node: h5file.removeNode('/', "Data", 1) h5file.renameNode('/', "Data", node_name) h5file.flush() return h5file, gcolumns
def analyze(config): output_path = config.get('output_path'); # model_file = os.path.join(output_path, 'eeg', 'conv3', 'convolutional_network.pkl'); # model_file = os.path.join(output_path, 'eeg', 'conv10', 'epochs', 'cnn_epoch94.pkl'); model_file = '../../../debug/debug_run4/debug_network.pkl'; with log_timing(log, 'loading convnet model from {}'.format(model_file)): model = serial.load(model_file); input_shape = model.get_input_space().shape; config = config.eeg; hyper_params = { 'input_length':input_shape[0], #25+151-1+301-1, # this should leave a single value per channel after convolution 'hop_size':5, # reduce amount of data by factor 5 'dataset_root': config.get('dataset_root'), 'dataset_suffix': config.get('dataset_suffix'), 'save_path': config.get('save_path'), } dataset_yaml = ''' !obj:deepthought.datasets.rwanda2013rhythms.EEGDataset.EEGDataset { name : 'testset', path : %(dataset_root)s, suffix : '_channels', # %(dataset_suffix)s, subjects : [0], resample : [400, 100], start_sample : 2500, stop_sample : 3200, # None (empty) = end of sequence # FIXME: # n_fft : 24, # frame_size : 10, # %(input_length)i, frame_size : %(input_length)i, hop_size : %(hop_size)i, label_mode : 'rhythm_type', # save_matrix_path: '../../../debug/debug.pkl' } ''' dataset_yaml = dataset_yaml % hyper_params; print dataset_yaml; with log_timing(log, 'parsing yaml'): testset = yaml_parse.load(dataset_yaml); # print testset.subject_partitions; # print testset.sequence_partitions; seq_starts = testset.sequence_partitions; # return; # axes=['b', 0, 1, 'c'] # def dimshuffle(b01c): # default = ('b', 0, 1, 'c') # return b01c.transpose(*[default.index(axis) for axis in axes]) # data = dimshuffle(testset.X); # design_matrix = model.get_design_matrix() # view_converter = DefaultViewConverter([475, 1, 1]); # data = view_converter. # ## get the labels # data_specs= (model.get_output_space(), "targets"); # it = testset.iterator( # mode='sequential', # batch_size=100, # data_specs=data_specs); # labels = np.hstack([np.argmax(minibatch, axis = 1) for minibatch in it]) # print labels[0:1000] # # ## get the predictions # minibatch = model.get_input_space().make_theano_batch(); # output_fn = theano.function(inputs=[minibatch], # outputs=T.argmax(model.fprop(minibatch), axis = 1)); # print "function compiled" # # data_specs= (CompositeSpace(( # # model.get_input_space(), # # model.get_output_space())), # # ("features", "targets")); # # data_specs= (model.get_input_space(), "features"); # it = testset.iterator( # mode='sequential', # batch_size=100, # data_specs=data_specs); # print "iterator ready" # # y_pred = np.hstack([output_fn(minibatch) for minibatch in it]) # # print y_pred[0:1000] minibatch = model.get_input_space().make_theano_batch(); output_fn = theano.function(inputs=[minibatch], outputs=T.argmax(model.fprop(minibatch), axis = 1)); print "function compiled" data_specs= (CompositeSpace(( model.get_input_space(), model.get_output_space())), ("features", "targets")); it = testset.iterator('sequential', batch_size=100, data_specs=data_specs); print "iterator ready" y_pred = []; y_real = []; for minibatch, target in it: y_pred.append(output_fn(minibatch)); y_real.append(np.argmax(target, axis = 1)); y_pred = np.hstack(y_pred); y_real = np.hstack(y_real); print y_pred[0:1000] print classification_report(y_real, y_pred); print confusion_matrix(y_real, y_pred); misclass = (y_real != y_pred); print misclass.mean(); correct = 0; s_real = []; s_pred = []; s_pred_agg = []; n_channels = 16; channel_scores = np.zeros(n_channels, dtype=np.int); for i in xrange(len(seq_starts)): start = seq_starts[i]; if i < len(seq_starts) - 1: stop = seq_starts[i+1]; else: stop = None; s_real.append(y_real[start]); # print np.bincount(y_pred[start:stop]); # print np.argmax(np.bincount(y_pred[start:stop])); s_pred.append(np.argmax(np.bincount(y_pred[start:stop]))); s_pred_agg.append(np.mean(y_pred[start:stop])); # works only for binary classification seq_misclass = misclass[start:stop].mean(); # print '{} [{}{}]: {}'.format(i, start, stop, seq_misclass); if seq_misclass < 0.5: # more correct than incorrect correct += 1; channel_scores[i%n_channels] += 1; s_real = np.hstack(s_real); s_pred = np.hstack(s_pred); print s_real; print s_pred; print s_pred_agg; print 'aggregated' print classification_report(s_real, s_pred); print confusion_matrix(s_real, s_pred); s_misclass = (s_real != s_pred); print s_misclass.mean(); print channel_scores; return; input_shape = model.get_input_space().shape; print input_shape view_converter = DefaultViewConverter((input_shape[0], input_shape[1], 1)); data = view_converter.design_mat_to_topo_view(testset.X); print data.shape; X = model.get_input_space().make_theano_batch() Y = model.fprop( X ) Y = T.argmax( Y, axis = 1 ) # needed - otherwise not single value output_fn = theano.function( [X], Y ); # y_pred = output_fn( data ); batch_size = 1000; y_pred = []; batch_start = 0; while batch_start < data.shape[0]: batch_stop = min(data.shape[0], batch_start + batch_size); y_pred.append(output_fn( data[batch_start:batch_stop] )); # if batch_start == 0: print y_pred; batch_start = batch_stop; y_pred = np.hstack(y_pred); print testset.labels[0:1000] print y_pred[0:1000] print classification_report(testset.labels, y_pred); print confusion_matrix(testset.labels, y_pred); labels = np.argmax(testset.y, axis=1) print classification_report(labels, y_pred); print confusion_matrix(labels, y_pred); labels = np.argmax(testset.y, axis=1) print classification_report(labels, y_pred); print confusion_matrix(labels, y_pred); misclass = (labels != y_pred).mean() print misclass # # alternative version from KeepBestParams # minibatch = T.matrix('minibatch') # output_fn = theano.function(inputs=[minibatch],outputs=T.argmax( model.fprop(minibatch), axis = 1 )); # it = testset.iterator('sequential', batch_size=batch_size, targets=False); # y_pred = [output_fn(mbatch) for mbatch in it]; # y_hat = T.argmax(state, axis=1) # y = T.argmax(target, axis=1) # misclass = T.neq(y, y_hat).mean() # misclass = T.cast(misclass, config.floatX) # rval['misclass'] = misclass # rval['nll'] = self.cost(Y_hat=state, Y=target) log.debug('done');
def make_viewer(mat, grid_shape=None, patch_shape=None, activation=None, pad=None, is_color = False, rescale = True): """ Given filters in rows, guesses dimensions of patches and nice dimensions for the PatchViewer and returns a PatchViewer containing visualizations of the filters. Parameters ---------- mat : ndarray Values should lie in [-1, 1] if `rescale` is False. 0. always indicates medium gray, with negative values drawn as blacker and positive values drawn as whiter. A matrix with each row being a different image patch, OR a 4D tensor in ('b', 0, 1, 'c') format. If matrix, we assume it was flattened using the same procedure as a ('b', 0, 1, 'c') DefaultViewConverter uses. grid_shape : tuple, optional A tuple of two ints specifying the shape of the grad in the PatchViewer, in (rows, cols) format. If not specified, this function does its best to choose an aesthetically pleasing value. patch_shape : tupe, optional A tuple of two ints specifying the shape of the patch. If `mat` is 4D, this function gets the patch shape from the shape of `mat`. If `mat` is 2D and patch_shape is not specified, this function assumes the patches are perfectly square. activation : iterable An iterable collection describing some kind of activation value associated with each patch. This is indicated with a border around the patch whose color intensity increases with activation value. The individual activation values may be single floats to draw one border or iterable collections of floats to draw multiple borders with differing intensities around the patch. pad : int, optional The amount of padding to add between patches in the displayed image. is_color : int If True, assume the images are in color. Note needed if `mat` is in ('b', 0, 1, 'c') format since we can just look at its shape[-1]. rescale : bool If True, rescale each patch so that its highest magnitude pixel reaches a value of either 0 or 1 depending on the sign of that pixel. Returns ------- patch_viewer : PatchViewer A PatchViewer containing the patches stored in `mat`. """ num_channels = 1 if is_color: num_channels = 3 if grid_shape is None: grid_shape = PatchViewer.pick_shape(mat.shape[0] ) if mat.ndim > 2: patch_shape = mat.shape[1:3] topo_view = mat num_channels = mat.shape[3] is_color = num_channels > 1 else: if patch_shape is None: assert mat.shape[1] % num_channels == 0 patch_shape = PatchViewer.pick_shape(mat.shape[1] / num_channels, exact = True) assert mat.shape[1] == (patch_shape[0] * patch_shape[1] * num_channels) topo_shape = (patch_shape[0], patch_shape[1], num_channels) view_converter = DefaultViewConverter(topo_shape) topo_view = view_converter.design_mat_to_topo_view(mat) rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color = is_color) for i in xrange(mat.shape[0]): if activation is not None: if hasattr(activation[0], '__iter__'): act = [a[i] for a in activation] else: act = activation[i] else: act = None patch = topo_view[i, :] rval.add_patch(patch, rescale=rescale, activation=act) return rval
def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print('restricting to examples ', self.restrict[0], ' through ', self.restrict[1], ' exclusive') full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print('defining features') V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print('compiling theano function') f = function([V], feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1, 2))) else: assert False def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval output = np.zeros((num_examples, num_output_features), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print(num_examples) print(batch_size) for i in xrange(0, num_examples - batch_size + 1, batch_size): print(i) t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if contains_nan(feat): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].max() else: assert False assert output[i:i + batch_size, :].max() < 1e20 t6 = time.time() print((t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def _execute(self): global num_superpixels global num_output_features global idxs global top global bottom global left global right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1],:] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) ) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier Z = T.matrix('Z') pos = T.clip(Z,0.,1e30) neg = T.clip(-Z,0.,1e30) feat = T.concatenate((pos, neg), axis=1) assert feat.dtype == 'float32' print 'compiling theano function' f = function([Z],feat) nhid = 3200 # 2 * num dictionary elems if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, nhid) topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')() region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1,2)) ) def average_pool( stride ): def point( p ): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32') for i in xrange(stride): for j in xrange(stride): rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] ) return rval output = np.zeros((num_examples,num_output_features),dtype='float32') fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nhid] ) ) ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) if len(range(0,num_examples-batch_size+1,batch_size)) <= 0: print num_examples print batch_size for i in xrange(0,num_examples-batch_size+1,batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i+batch_size,:]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit = False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' M.put(s,'batch',X2) M.eval(s, 'Z = sparse_codes(batch, dictionary, lambda)') Z = M.get(s, 'Z') feat = f(np.cast['float32'](Z)) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 assert superpixels.shape[0] == batch_size assert superpixels.shape[1] == num_superpixels assert superpixels.shape[2] == num_superpixels assert superpixels.shape[3] == 2 * num_filters for j in xrange(num_output_features): output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1, left[j]:right[j]+1, idxs[j]].mean() t6 = time.time() print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy' np.save(save_path,output) if nan > 0: warnings.warn(str(nan)+' features were nan')
def __init__(self, patient_id, which_set, preprocessor_path, data_dir, leave_one_out_seizure, sample_size_second, batch_size, default_seed=0): """ The Epilepsiae dataset customized for leave-one-seizure-out cross validation. Parameters ---------- patient_id : int Patient ID. which_set : string Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test'). If not specified, all data will be loaded. preprocessor_path : string File path to store the scaler for pre-processing the EEG data. data_dir : string Directory that store the source EEG data. leave_one_out_seizure : int Index of the withheld seizure. sample_size_second : int Number of seconds used to specify sample size. batch_size : int Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size. default_seed : int, optional Seed for random. For preprocessing, see more in https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py For customizing dataset, see more in https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py """ # Load data files = ['rec_26402102/26402102_0003.mat', 'rec_26402102/26402102_0007.mat', 'rec_26402102/26402102_0008.mat', 'rec_26402102/26402102_0017.mat'] scalp_channels = np.asarray([ u'FP1', u'FP2', u'F3', u'F4', u'C3', u'C4', u'P3', u'P4', u'O1', u'O2', u'F7', u'F8', u'T3', u'T4', u'T5', u'T6', u'FZ', u'CZ', u'PZ' ]) # Get seizure information seizure_info = pd.read_table(os.path.join(data_dir, 'RECORDS-WITH-SEIZURES.txt'), sep='\t') seizure_info['filename'] = seizure_info['filename'].str.replace('.data', '.mat', case=False) self.data_dir = data_dir self.files = files self.seizure_info = seizure_info self.filter_channels = scalp_channels self.default_seed = default_seed self.leave_one_out_seizure = leave_one_out_seizure self.batch_size = batch_size X, y, n_channels, sample_size = self.load_data(which_set, sample_size_second, batch_size, preprocessor_path) self.n_channels = n_channels self.sample_size = sample_size view_converter = DefaultViewConverter((1, sample_size, 1)) view_converter.set_axes(axes=['b', 0, 1, 'c']) DenseDesignMatrix.__init__(self, X=X, y=y, view_converter=view_converter, axes=['b', 0, 1, 'c'])
def __init__(self, patient_id, which_set, preprocessor_path, data_dir, leave_one_out_seizure, sample_size_second, batch_size, default_seed=0): """ The Epilepsiae dataset customized for leave-one-seizure-out cross validation. Parameters ---------- patient_id : int Patient ID. which_set : string Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test'). If not specified, all data will be loaded. preprocessor_path : string File path to store the scaler for pre-processing the EEG data. data_dir : string Directory that store the source EEG data. leave_one_out_seizure : int Index of the withheld seizure. sample_size_second : int Number of seconds used to specify sample size. batch_size : int Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size. default_seed : int, optional Seed for random. For preprocessing, see more in https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py For customizing dataset, see more in https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py """ # Load data files = [ 'rec_26402102/26402102_0003.mat', 'rec_26402102/26402102_0007.mat', 'rec_26402102/26402102_0008.mat', 'rec_26402102/26402102_0017.mat' ] scalp_channels = np.asarray([ u'FP1', u'FP2', u'F3', u'F4', u'C3', u'C4', u'P3', u'P4', u'O1', u'O2', u'F7', u'F8', u'T3', u'T4', u'T5', u'T6', u'FZ', u'CZ', u'PZ' ]) # Get seizure information seizure_info = pd.read_table(os.path.join(data_dir, 'RECORDS-WITH-SEIZURES.txt'), sep='\t') seizure_info['filename'] = seizure_info['filename'].str.replace( '.data', '.mat', case=False) self.data_dir = data_dir self.files = files self.seizure_info = seizure_info self.filter_channels = scalp_channels self.default_seed = default_seed self.leave_one_out_seizure = leave_one_out_seizure self.batch_size = batch_size X, y, n_channels, sample_size = self.load_data(which_set, sample_size_second, batch_size, preprocessor_path) self.n_channels = n_channels self.sample_size = sample_size view_converter = DefaultViewConverter((1, sample_size, 1)) view_converter.set_axes(axes=['b', 0, 1, 'c']) DenseDesignMatrix.__init__(self, X=X, y=y, view_converter=view_converter, axes=['b', 0, 1, 'c'])