def check_padding(axes): padding = 3 ddata = DummyDataset() topo = ddata.get_topological_view() wf_cls = WindowAndFlip wf = wf_cls(window_shape=(5, 5), randomize=[ddata], pad_randomized=padding) wf.setup(None, None, None) new_topo = ddata.get_topological_view() assert_equal(topo.shape, new_topo.shape) saw_padding = dict([((direction, amount), False) for direction, amount in itertools.product(['l', 'b', 'r', 't'], xrange(padding))]) iters = 0 while not all(saw_padding.values()) and iters < 50: for image in new_topo.swapaxes(0, 3): for i in xrange(padding): if (image[:i] == 0).all(): saw_padding['t', i] = True if (image[-i:] == 0).all(): saw_padding['b', i] = True if (image[:, -i:] == 0).all(): saw_padding['r', i] = True if (image[:, :i] == 0).all(): saw_padding['l', i] = True wf.on_monitor(None, None, None) new_topo = ddata.get_topological_view() iters += 1
def test_mean_H_given_V(self): tol = 1e-6 # P(h_1 | v) / P(h_2 | v) = a # => exp(-E(v, h_1)) / exp(-E(v,h_2)) = a # => exp(E(v,h_2)-E(v,h_1)) = a # E(v,h_2) - E(v,h_1) = log(a) # also log P(h_1 | v) - log P(h_2) = log(a) rng = N.random.RandomState([1, 2, 3]) m = 5 Vv = as_floatX(N.zeros((m, self.nv)) + rng.randn(self.nv)) Hv = as_floatX(rng.randn(m, self.nh) > 0.) log_Pv = self.log_P_H_given_V_func(Hv, Vv) Ev = self.E_func(Vv, Hv) for i in xrange(m): for j in xrange(i + 1, m): log_a = log_Pv[i] - log_Pv[j] e = Ev[j] - Ev[i] assert abs(e-log_a) < tol
def outer(self, Y, Y_hat): if self._requires_reshape: if self._requires_unmask: try: Y, Y_mask = Y Y_hat, Y_hat_mask = Y_hat except: log.warning("Lost the mask when wrapping cost. This " "can happen if this function is called " "from within another wrapped function. " "Most likely this won't cause any problem") return cost(self, Y, Y_hat) input_shape = ([Y.shape[0] * Y.shape[1]] + [Y.shape[i] for i in xrange(2, Y.ndim)]) reshaped_Y = Y.reshape(input_shape) if isinstance(Y_hat, tuple): input_shape = ([[Y_hat[j].shape[0] * Y_hat[j].shape[1]] + [Y_hat[j].shape[i] for i in xrange(2, Y_hat[j].ndim)] for j in xrange(len(Y_hat))]) reshaped_Y_hat = [] for i in xrange(len(Y_hat)): reshaped_Y_hat.append(Y_hat[i].reshape(input_shape[i])) reshaped_Y_hat = tuple(reshaped_Y_hat) else: reshaped_Y_hat = Y_hat.reshape(input_shape) # Here we need to take the indices of only the unmasked data if self._requires_unmask: return cost(self, reshaped_Y[Y_mask.flatten().nonzero()], reshaped_Y_hat[Y_mask.flatten().nonzero()]) return cost(self, reshaped_Y, reshaped_Y_hat) else: # Not RNN-friendly, but not requiring reshape return cost(self, Y, Y_hat)
def stochastic_max_pool_bc01(bc01, pool_shape, pool_stride, image_shape, rng=None): """ .. todo:: WRITEME properly Stochastic max pooling for training as defined in: Stochastic Pooling for Regularization of Deep Convolutional Neural Networks Matthew D. Zeiler, Rob Fergus Parameters ---------- bc01 : theano 4-tensor in format (batch size, channels, rows, cols), IMPORTANT: All values should be positive pool_shape : tuple shape of the pool region (rows, cols) pool_stride : tuple strides between pooling regions (row stride, col stride) image_shape : tuple avoid doing some of the arithmetic in theano rng : theano random stream """ r, c = image_shape pr, pc = pool_shape rs, cs = pool_stride batch = bc01.shape[0] channel = bc01.shape[1] rng = make_theano_rng(rng, 2022, which_method='multinomial') # Compute index in pooled space of last needed pool # (needed = each input pixel must appear in at least one pool) def last_pool(im_shp, p_shp, p_strd): rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd)) assert p_strd * rval + p_shp >= im_shp assert p_strd * (rval - 1) + p_shp < im_shp return rval # Compute starting row of the last pool last_pool_r = last_pool(image_shape[0], pool_shape[0], pool_stride[0]) * pool_stride[0] # Compute number of rows needed in image for all indexes to work out required_r = last_pool_r + pr last_pool_c = last_pool(image_shape[1], pool_shape[1], pool_stride[1]) * pool_stride[1] required_c = last_pool_c + pc # final result shape res_r = int(numpy.floor(last_pool_r / rs)) + 1 res_c = int(numpy.floor(last_pool_c / cs)) + 1 for bc01v in get_debug_values(bc01): assert not contains_inf(bc01v) assert bc01v.shape[2] == image_shape[0] assert bc01v.shape[3] == image_shape[1] # padding padded = tensor.alloc(0.0, batch, channel, required_r, required_c) name = bc01.name if name is None: name = 'anon_bc01' bc01 = tensor.set_subtensor(padded[:, :, 0:r, 0:c], bc01) bc01.name = 'zero_padded_' + name # unraveling window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc) window.name = 'unravlled_winodows_' + name for row_within_pool in xrange(pool_shape[0]): row_stop = last_pool_r + row_within_pool + 1 for col_within_pool in xrange(pool_shape[1]): col_stop = last_pool_c + col_within_pool + 1 win_cell = bc01[:, :, row_within_pool:row_stop:rs, col_within_pool:col_stop:cs] window = tensor.set_subtensor( window[:, :, :, :, row_within_pool, col_within_pool], win_cell) # find the norm norm = window.sum(axis=[4, 5]) norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm) norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x') # get prob prob = rng.multinomial(pvals=norm.reshape( (batch * channel * res_r * res_c, pr * pc)), dtype='float32') # select res = (window * prob.reshape( (batch, channel, res_r, res_c, pr, pc))).max(axis=5).max(axis=4) res.name = 'pooled_' + name return tensor.cast(res, theano.config.floatX)
def tile_raster_images(X, img_shape, tile_shape=None, tile_spacing=(1,1), scale_rows_to_unit_interval=True, output_pixel_vals=True, min_dynamic_range=1e-4, ): """ Transform an array with one flattened image per row, into an array in which images are reshaped and layed out like tiles on a floor. This function is useful for visualizing datasets whose rows are images, and also columns of matrices for transforming those rows (such as the first layer of a neural net). Parameters ---------- X : numpy.ndarray or tuple of 4 channels or None A 2-D array in which every row is a flattened image. img_shape : tuple The original shape of each image tile_shape: tuple The number of images to tile (rows, cols). Defaults to a square-ish \ shape with the right area for the number of images. min_dynamic_range: float, positive Dynamic range of each image is used in scaling to the unit interval, \ but images with less dynamic range than this will be scaled as if \ this were the dynamic range. Returns ------- out_array : 2D array with same dtype as X Array suitable for viewing as an image (See:`PIL.Image.fromarray`). """ # This is premature when tile_slices_to_image is not documented at all yet, # but ultimately true: #print >> sys.stderr, "WARN: tile_raster_images sucks, use tile_slices_to_image" if len(img_shape)==3 and img_shape[2]==3: # make this save an rgb image if scale_rows_to_unit_interval: logger.warning("tile_raster_images' scaling routine " "messes up colour - try tile_slices_to_image") return tile_raster_images( (X[:,0::3], X[:,1::3], X[:,2::3], None), img_shape=img_shape[:2], tile_shape=tile_shape, tile_spacing=tile_spacing, scale_rows_to_unit_interval=scale_rows_to_unit_interval, output_pixel_vals=output_pixel_vals, min_dynamic_range=min_dynamic_range) if isinstance(X, tuple): n_images_in_x = X[0].shape[0] else: n_images_in_x = X.shape[0] if tile_shape is None: tile_shape = most_square_shape(n_images_in_x) assert len(img_shape) == 2 assert len(tile_shape) == 2 assert len(tile_spacing) == 2 #out_shape is the shape in pixels of the returned image array out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)] if isinstance(X, tuple): if scale_rows_to_unit_interval: raise NotImplementedError() assert len(X) == 4 if output_pixel_vals: out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8') else: out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype) #colors default to 0, alpha defaults to 1 (opaque) if output_pixel_vals: channel_defaults = [0,0,0,255] else: channel_defaults = [0.,0.,0.,1.] for i in xrange(4): if X[i] is None: out_array[:,:,i] = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else out_array.dtype )+channel_defaults[i] else: out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals) return out_array else: H, W = img_shape Hs, Ws = tile_spacing out_scaling = 1 if output_pixel_vals and str(X.dtype).startswith('float'): out_scaling = 255 out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype) for tile_row in xrange(tile_shape[0]): for tile_col in xrange(tile_shape[1]): if tile_row * tile_shape[1] + tile_col < X.shape[0]: if scale_rows_to_unit_interval: try: this_img = scale_to_unit_interval( X[tile_row * tile_shape[1] + tile_col].reshape(img_shape), eps=min_dynamic_range) except ValueError: raise ValueError('Failed to reshape array of shape %s to shape %s' % ( X[tile_row*tile_shape[1] + tile_col].shape , img_shape )) else: this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape) out_array[ tile_row * (H+Hs):tile_row*(H+Hs)+H, tile_col * (W+Ws):tile_col*(W+Ws)+W ] \ = this_img * out_scaling return out_array
def __init__(self, which_set, center=False, example_range=None): """ .. todo:: WRITEME """ if which_set == 'train': train = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/train.mat') # Load the class names self.class_names = [ array[0].encode('utf-8') for array in train['class_names'][0] ] # Load the fold indices fold_indices = train['fold_indices'] assert fold_indices.shape == (1, 10) self.fold_indices = np.zeros((10, 1000), dtype='uint16') for i in xrange(10): indices = fold_indices[0, i] assert indices.shape == (1000, 1) assert indices.dtype == 'uint16' self.fold_indices[i, :] = indices[:, 0] # The data is stored as uint8 # If we leave it as uint8, it will cause the CAE to silently fail # since theano will treat derivatives wrt X as 0 X = np.cast['float32'](train['X']) assert X.shape == (5000, 96 * 96 * 3) if example_range is not None: X = X[example_range[0]:example_range[1], :] # this is uint8 y = train['y'][:, 0] assert y.shape == (5000, ) elif which_set == 'test': test = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/test.mat') # Load the class names self.class_names = [ array[0].encode('utf-8') for array in test['class_names'][0] ] # The data is stored as uint8 # If we leave it as uint8, it will cause the CAE to silently fail # since theano will treat derivatives wrt X as 0 X = np.cast['float32'](test['X']) assert X.shape == (8000, 96 * 96 * 3) if example_range is not None: X = X[example_range[0]:example_range[1], :] # this is uint8 y = test['y'][:, 0] assert y.shape == (8000, ) elif which_set == 'unlabeled': unlabeled = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/' 'unlabeled.mat') X = unlabeled['X'] # this file is stored in HDF format, which transposes everything assert X.shape == (96 * 96 * 3, 100000) assert X.dtype == 'uint8' if example_range is None: X = X.value else: X = X.value[:, example_range[0]:example_range[1]] X = np.cast['float32'](X.T) unlabeled.close() y = None else: raise ValueError('"' + which_set + '" is not an STL10 dataset. ' 'Recognized values are "train", "test", and ' '"unlabeled".') if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 3)) super(STL10, self).__init__(X=X, y=y, y_labels=10, view_converter=view_converter) for i in xrange(self.X.shape[0]): mat = X[i:i + 1, :] topo = self.get_topological_view(mat) for j in xrange(topo.shape[3]): temp = topo[0, :, :, j].T.copy() topo[0, :, :, j] = temp mat = self.get_design_matrix(topo) X[i:i + 1, :] = mat assert not contains_nan(self.X)
def __init__(self, datasets, which_set, sequence=1, dropout=True, normalise=[], labels=[], shuffle=False, start=None, stop=None): self.args = locals() assert which_set in ['train', 'valid'] assert len(normalise) == len(datasets) vector_spaces = tuple() data = tuple() self.mean = list() self.std = list() z = list() for dataset, n in zip(datasets, normalise): if dataset[-3] == 'npz': mode_data = N.load(dataset)['arr_0'] else: mode_data = N.load(dataset) z.append((mode_data == 0).all(axis=1)) r, c = mode_data.shape if start is not None: assert stop is not None assert start >= 0 assert stop > start if stop > mode_data.shape[0]: raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(mode_data.shape[0])) mode_data = mode_data[start:stop, :] if mode_data.shape[0] != stop - start: raise ValueError("data.shape[0]: %d. start: %d stop: %d" % (mode_data.shape[0], start, stop)) cuts = (mode_data == 0).all(axis=1).nonzero() mean = 1 std = 1 if n == 1: mean = mode_data.mean() std = mode_data.std() elif n == 2: mean = mode_data.mean(axis=0) std = mode_data.std(axis=0) mode_data = (mode_data - mean) / std self.mean.append(mean) self.std.append(std) if sequence != 1: temp = mode_data mode_data = np.zeros([temp.shape[0], sequence * temp.shape[1]]) for i in range(0, temp.shape[0] - sequence): mode_data[i, :] = temp[i:i + sequence, :].reshape( sequence * temp.shape[1]) del temp vector_spaces = vector_spaces + (VectorSpace(mode_data.shape[1]), ) data = data + (mode_data, ) # # remove changes between datasets if stacked # b = N.logical_and(z[0],z[1]) # for a in z[2:]: # b = N.logical_and(b,a) # for ii in range(b.shape[0]): # if b[ii]: # for i in range(ii-sequence+1,ii): # b[i] = True # ind = N.logical_not(b).nonzero() # for ii in range(len(data)): # data[ii] = data[ii][ind,:] # # assert data[0].shape[0] == data[1].shape[0] # assert data[2].shape[0] == data[1].shape[0] # assert data[2].shape[0] == data[0].shape[0] ground_truth = data[0].copy() for ii in data[1:]: ground_truth = N.concatenate((ground_truth, ii), axis=1) # Modal Dropout - drop only 1 mode at a time if dropout: data = list(data) seq = N.ones((1, 3)) for ii in range(len(datasets)): seq2 = seq.copy() seq2[:, ii] = N.zeros((seq.shape[0])) seq = N.concatenate((seq, seq2), axis=0) seq = seq[1:-1, :] cases = N.where(seq.sum(axis=1) != len(datasets) - 1) seq = N.delete(seq, cases, axis=0) all_data = list(data) all_data.append(ground_truth) for ii in all_data: assert not N.isnan(N.sum(ii)) for i, ii in enumerate(seq): for j, jj in enumerate(ii): if jj: all_data[j] = N.concatenate((all_data[j], data[j]), axis=0) else: all_data[j] = N.concatenate( (all_data[j], N.zeros(data[j].shape)), axis=0) all_data[-1] = N.concatenate((all_data[-1], ground_truth), axis=0) data = tuple(all_data) del all_data else: data = list(data) data.append(ground_truth) data = tuple(data) vector_spaces = vector_spaces + (VectorSpace(data[-1].shape[1]), ) if shuffle: self.shuffle_rng = make_np_rng(None, [1, 2, 3], which_method="shuffle") for ii in xrange(data[0].shape[0]): jj = self.shuffle_rng.randint(data[0].shape[0]) # Copy ensures that memory is not aliased. for d in data: tmp = d[ii, :].copy() d[ii, :] = d[jj, :] d[jj, :] = tmp if len(labels) == 0: for ii in range(len(datasets)): labels.append('dataset_%i' % ii) labels.append('targets') data_specs = (CompositeSpace(vector_spaces), tuple(labels)) super(CustomMMLoaderDropout, self).__init__(data=data, data_specs=data_specs)
def __init__(self, models, datasets, normalise, which_set, batch_size, shuffle=False, start=None, stop=None, length=None, axes=['b', 0, 1, 'c']): self.args = locals() existing_data_path = os.environ['MMDAEdata'] + splitext( basename(models[0]))[0] + '_' + splitext(basename( datasets[0]))[0] + '_' + splitext( basename(models[1]))[0] + '_' + splitext( basename(datasets[1]))[0] + '_' + which_set + '.npy' assert which_set in ['train', 'valid'] assert type(models) is list assert type(datasets) is list assert len(models) == len(datasets) assert len(models) == len(normalise) def dimshuffle(b01c): default = ('b', 0, 1, 'c') return b01c.transpose(*[default.index(axis) for axis in axes]) # only process if it hasn't been done already if not os.path.exists(existing_data_path): for ii in range(len(models)): # Load single mode model for first layer model = serial.load(models[ii]) # Load the single mode data data = N.load(datasets[ii]) if normalise[ii] == 1: data_mean = data.mean() data_std = data.std() elif normalise[ii] == 2: data_mean = data.mean(axis=0) data_std = data.std(axis=0) data = (data - data_mean) / data_std sequence = model.dataset_yaml_src.split('sequence: ') if len(sequence) > 1: sequence = int(sequence[1].split(',')[0]) else: sequence = 1 if 'sequence_old' in locals(): assert sequence == sequence_old sequence_old = sequence if sequence != 1: temp = data data = np.zeros([temp.shape[0], sequence * temp.shape[1]]) for i in range(0, temp.shape[0] - sequence): data[i, :] = temp[i:i + sequence, :].reshape( 1, sequence * temp.shape[1]) del temp if start is not None: assert stop is not None assert start >= 0 assert stop > start assert ((stop - start) % batch_size) == 0 if stop > data.shape[0]: raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(self.X.shape[0])) data = data[start:stop, :] if data.shape[0] != stop - start: raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop)) # Process data to get hidden representation from first layer data = theano.shared(data) data = model.mf(data) data = data[0] data = data[0] data = data.eval() if 'topo_view' not in locals(): topo_view = data.reshape(data.shape[0], 1, data.shape[1]) else: topo_view = N.append(topo_view, data.reshape(data.shape[0], 1, data.shape[1]), axis=2) m, r, c = topo_view.shape topo_view = topo_view.reshape(m, r, c, 1) # save the data to avoid reprocessing later N.save(existing_data_path, topo_view) else: topo_view = N.load(existing_data_path) m = topo_view.shape[0] if shuffle: self.shuffle_rng = make_np_rng(None, [1, 2, 3], which_method="shuffle") for i in xrange(topo_view.shape[0]): j = self.shuffle_rng.randint(m) # Copy ensures that memory is not aliased. tmp = topo_view[i, :, :, :].copy() topo_view[i, :, :, :] = topo_view[j, :, :, :] topo_view[j, :, :, :] = tmp super(CustomMMPosterior, self).__init__(topo_view=dimshuffle(topo_view)) assert not N.any(N.isnan(self.X))
def check_sample_correctishness_c01b(f): batch_size = 5 rows = 32 cols = 30 channels = 3 pool_rows = 2 pool_cols = 3 rng = np.random.RandomState([2012, 9, 26]) zv = rng.randn(channels, rows, cols, batch_size).astype(config.floatX) * 2. - 3. top_down_v = rng.randn(channels, rows / pool_rows, cols / pool_cols, batch_size).astype(config.floatX) z_th = T.TensorType(broadcastable=(False, False, False, False), dtype = config.floatX)() z_th.name = 'z_th' z_th.tag.test_value = zv top_down_th = T.TensorType(broadcastable=(False, False, False, False), dtype = config.floatX)() top_down_th.name = 'top_down_th' top_down_th.tag.test_value = top_down_v theano_rng = MRG_RandomStreams(rng.randint(2147462579)) p_th, h_th, p_sth, h_sth = f(z_th, (pool_rows, pool_cols), top_down_th, theano_rng) prob_func = function([z_th, top_down_th], [p_th, h_th]) pv, hv = prob_func(zv, top_down_v) sample_func = function([z_th, top_down_th], [p_sth, h_sth]) acc_p = 0. * pv acc_h = 0. * hv # make sure the test gets good coverage, ie, that it includes # many different activation probs for both detector and pooling layer buckets = 10 bucket_width = 1. / float(buckets) for i in xrange(buckets): lower_lim = i * bucket_width upper_lim = (i+1) * bucket_width assert np.any((pv >= lower_lim) * (pv < upper_lim)) assert np.any((hv >= lower_lim) * (hv < upper_lim)) assert upper_lim == 1. for i in xrange(10000): ps, hs = sample_func(zv, top_down_v) assert ps.shape == pv.shape assert hs.shape == hv.shape acc_p += ps acc_h += hs est_p = acc_p / float(i+1) est_h = acc_h / float(i+1) pd = np.abs(est_p-pv) hd = np.abs(est_h-hv) # don't really know how tight this should be # but you can try to pose an equivalent problem # and implement it in another way # using a numpy implementation in softmax_acc.py # I got a max error of .17 assert max(pd.max(), hd.max()) < .17 # Do exhaustive checks on just the last sample assert np.all((ps == 0) + (ps == 1)) assert np.all((hs == 0) + (hs == 1)) for k in xrange(batch_size): for i in xrange(ps.shape[1]): for j in xrange(ps.shape[2]): for l in xrange(channels): p = ps[l, i, j, k] h = hs[l, i*pool_rows:(i+1)*pool_rows, j*pool_cols:(j+1)*pool_cols, k] assert h.shape == (pool_rows, pool_cols) assert p == h.max() """ If you made it to here, it's correctish
def __init__(self, which_set, center=False, custom_path=None): assert which_set in ['train', 'test', 'unlabeled', 'custom'] path = "${PYLEARN2_DATA_PATH}/TLChallenge" if which_set == 'train': path += '/training/training-data.dat' elif which_set == 'test': path += '/test/test-data.dat' elif which_set == 'unlabeled': path += '/unlabelled_tiny.dat' elif which_set == 'custom': path = custom_path remote_path = preprocess(path) path = cache.datasetCache.cache_file(remote_path) X = N.fromfile(path, dtype=N.uint8, sep=' ') X = X.reshape(X.shape[0] / (32 * 32 * 3), 32 * 32 * 3, order='F') assert X.max() == 255 assert X.min() == 0 X = N.cast['float32'](X) y = None if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3)) X = view_converter.design_mat_to_topo_view(X) X = N.transpose(X, (0, 2, 1, 3)) X = view_converter.topo_view_to_design_mat(X) super(TL_Challenge, self).__init__(X=X, y=y, y_labels=N.max(y) + 1, view_converter=view_converter) assert not N.any(N.isnan(self.X)) if which_set == 'train' or which_set == 'test': labels_path = remote_path[:-8] + 'labels.dat' labels_path = cache.datasetCache.cache_file(labels_path) self.y_fine = N.fromfile(labels_path, dtype=N.uint8, sep=' ') assert len(self.y_fine.shape) == 1 assert self.y_fine.shape[0] == X.shape[0] # 0 : aquatic_mammals # 1 : fish # 2 : flowers FOOD_CONTAINER = 3 FRUIT = 4 # 5 : household_electrical_devices FURNITURE = 6 INSECTS = 7 # 8 : large_carnivores # 9 : large_man-made_outdoor_things # 10 : large_natural_outdoor_scenes LARGE_OMNIVORES_HERBIVORES = 11 MEDIUM_MAMMAL = 12 # 13 : non-insect_invertebrates # 14 : people # 15 : reptiles # 16 : small_mammals # 17 : trees # 18 : vehicles_1 # 19 : vehicles_2 self.y_coarse = self.y_fine.copy() self.y_coarse[self.y_coarse == 100] = INSECTS self.y_coarse[self.y_coarse == 101] = LARGE_OMNIVORES_HERBIVORES self.y_coarse[self.y_coarse == 102] = LARGE_OMNIVORES_HERBIVORES self.y_coarse[self.y_coarse == 103] = LARGE_OMNIVORES_HERBIVORES self.y_coarse[self.y_coarse == 104] = FRUIT self.y_coarse[self.y_coarse == 105] = FOOD_CONTAINER self.y_coarse[self.y_coarse == 106] = FRUIT self.y_coarse[self.y_coarse == 107] = MEDIUM_MAMMAL self.y_coarse[self.y_coarse == 108] = FRUIT self.y_coarse[self.y_coarse == 109] = FURNITURE assert self.y_coarse.min() == 3 assert self.y_coarse.max() == 12 for i in xrange(120): if self.y_coarse[i] == FRUIT: assert self.y_fine[i] in [104, 106, 108]
def test_softmax_mf_energy_consistent(): # A test of the Softmax class # Verifies that the mean field update is consistent with # the energy function # Since a Softmax layer contains only one random variable # (with n_classes possible values) the mean field assumption # does not impose any restriction so mf_update simply gives # the true expected value of h given v. # We also know P(h | v) # = P(h, v) / P( v) # = P(h, v) / sum_h P(h, v) # = exp(-E(h, v)) / sum_h exp(-E(h, v)) # So we can check that computing P(h | v) with both # methods works the same way rng = np.random.RandomState([2012, 11, 1, 1131]) # Make DBM num_vis = rng.randint(1, 11) n_classes = rng.randint(1, 11) v = BinaryVector(num_vis) v.set_biases(rng.uniform(-1., 1., (num_vis, )).astype(config.floatX)) y = Softmax(n_classes=n_classes, layer_name='y', irange=1.) y.set_biases(rng.uniform(-1., 1., (n_classes, )).astype(config.floatX)) dbm = DBM(visible_layer=v, hidden_layers=[y], batch_size=1, niter=50) # Randomly pick a v to condition on # (Random numbers are generated via dbm.rng) layer_to_state = dbm.make_layer_to_state(1) v_state = layer_to_state[v] y_state = layer_to_state[y] # Infer P(y | v) using mean field expected_y = y.mf_update(state_below=v.upward_state(v_state)) expected_y = expected_y[0, :] expected_y = expected_y.eval() # Infer P(y | v) using the energy function energy = dbm.energy(V=v_state, hidden=[y_state]) unnormalized_prob = T.exp(-energy) assert unnormalized_prob.ndim == 1 unnormalized_prob = unnormalized_prob[0] unnormalized_prob = function([], unnormalized_prob) def compute_unnormalized_prob(which): write_y = np.zeros((n_classes, )) write_y[which] = 1. y_value = y_state.get_value() y_value[0, :] = write_y y_state.set_value(y_value) return unnormalized_prob() probs = [compute_unnormalized_prob(idx) for idx in xrange(n_classes)] denom = sum(probs) probs = [on_prob / denom for on_prob in probs] # np.asarray(probs) doesn't make a numpy vector, so I do it manually wtf_numpy = np.zeros((n_classes, )) for i in xrange(n_classes): wtf_numpy[i] = probs[i] probs = wtf_numpy if not np.allclose(expected_y, probs): print('mean field expectation of h:', expected_y) print('expectation of h based on enumerating energy function values:', probs) assert False
def check_sample_correctishness_b01c(f): batch_size = 5 rows = 32 cols = 30 channels = 3 pool_rows = 2 pool_cols = 3 rng = np.random.RandomState([2012, 9, 26]) zv = rng.randn(batch_size, rows, cols, channels).astype(config.floatX) * 2. - 3. top_down_v = rng.randn(batch_size, rows / pool_rows, cols / pool_cols, channels).astype(config.floatX) z_th = T.TensorType(broadcastable=(False, False, False, False), dtype = config.floatX)() z_th.name = 'z_th' top_down_th = T.TensorType(broadcastable=(False, False, False, False), dtype = config.floatX)() top_down_th.name = 'top_down_th' theano_rng = MRG_RandomStreams(rng.randint(2147462579)) p_th, h_th, p_sth, h_sth = f(z_th, (pool_rows, pool_cols), top_down_th, theano_rng) prob_func = function([z_th, top_down_th], [p_th, h_th]) pv, hv = prob_func(zv, top_down_v) sample_func = function([z_th, top_down_th], [p_sth, h_sth]) acc_p = 0. * pv acc_h = 0. * hv # make sure the test gets good coverage, ie, that it includes many # different activation probs for both detector and pooling layer buckets = 10 bucket_width = 1. / float(buckets) for i in xrange(buckets): lower_lim = i * bucket_width upper_lim = (i+1) * bucket_width assert np.any((pv >= lower_lim) * (pv < upper_lim)) assert np.any((hv >= lower_lim) * (hv < upper_lim)) assert upper_lim == 1. for i in xrange(10000): ps, hs = sample_func(zv, top_down_v) assert ps.shape == pv.shape assert hs.shape == hv.shape acc_p += ps acc_h += hs est_p = acc_p / float(i+1) est_h = acc_h / float(i+1) pd = np.abs(est_p-pv) hd = np.abs(est_h-hv) """ # plot maps of the estimation error, this is to see if it has # some spatial pattern this is useful for detecting bugs like # not handling the border correctly, etc. from pylearn2.gui.patch_viewer import PatchViewer pv = PatchViewer((pd.shape[0],pd.shape[3]),(pd.shape[1],pd.shape[2]), is_color = False) for i in xrange(pd.shape[0]): for j in xrange(pd.shape[3]): pv.add_patch((pd[i,:,:,j] / pd.max() )* 2.0 - 1.0, rescale = False) pv.show() pv = PatchViewer((hd.shape[0],hd.shape[3]),(hd.shape[1],hd.shape[2]), is_color = False) for i in xrange(hd.shape[0]): for j in xrange(hd.shape[3]): pv.add_patch( (hd[i,:,:,j] / hd.max() )* 2.0 - 1.0, rescale = False) pv.show() """ """ plot expectation to estimate versus error in estimation expect bigger errors for values closer to 0.5 from matplotlib import pyplot as plt #nelem = reduce( lambda x, y : x*y, pd.shape) #plt.scatter( pv.reshape(nelem), pd.reshape(nelem)) #plt.show() nelem = reduce( lambda x, y : x*y, hd.shape) plt.scatter( hv.reshape(nelem), hd.reshape(nelem)) plt.show() """ # don't really know how tight this should be # but you can try to pose an equivalent problem # and implement it in another way # using a numpy implementation in softmax_acc.py # I got a max error of .17 assert max(pd.max(), hd.max()) < .17 # Do exhaustive checks on just the last sample assert np.all((ps == 0) + (ps == 1)) assert np.all((hs == 0) + (hs == 1)) for k in xrange(batch_size): for i in xrange(ps.shape[1]): for j in xrange(ps.shape[2]): for l in xrange(channels): p = ps[k, i, j, l] h = hs[k, i*pool_rows:(i+1)*pool_rows, j*pool_cols:(j+1)*pool_cols, l] assert h.shape == (pool_rows, pool_cols) assert p == h.max() """ If you made it to here, it's correctish
def do_test(pool_size_1): # Make DBM and read out its pieces dbm = make_random_basic_binary_dbm( rng=rng, pool_size_1=pool_size_1, pool_size_2=1, # centering is only updated for pool size 1 center=True) v = dbm.visible_layer h1, h2 = dbm.hidden_layers num_p = h1.get_output_space().dim # Choose which unit we will test p_idx = rng.randint(num_p) # Randomly pick a v, h1[-p_idx], and h2 to condition on # (Random numbers are generated via dbm.rng) layer_to_state = dbm.make_layer_to_state(1) v_state = layer_to_state[v] h1_state = layer_to_state[h1] h2_state = layer_to_state[h2] # Debugging checks num_h = h1.detector_layer_dim assert num_p * pool_size_1 == num_h pv, hv = h1_state assert pv.get_value().shape == (1, num_p) assert hv.get_value().shape == (1, num_h) # Infer P(h1[i] | h2, v) using mean field expected_p, expected_h = h1.mf_update( state_below=v.upward_state(v_state), state_above=h2.downward_state(h2_state), layer_above=h2) expected_p = expected_p[0, p_idx] expected_h = expected_h[0, p_idx * pool_size_1:(p_idx + 1) * pool_size_1] expected_p, expected_h = function([], [expected_p, expected_h])() # Infer P(h1[i] | h2, v) using the energy function energy = dbm.energy(V=v_state, hidden=[h1_state, h2_state]) unnormalized_prob = T.exp(-energy) assert unnormalized_prob.ndim == 1 unnormalized_prob = unnormalized_prob[0] unnormalized_prob = function([], unnormalized_prob) p_state, h_state = h1_state def compute_unnormalized_prob(which_detector): write_h = np.zeros((pool_size_1, )) if which_detector is None: write_p = 0. else: write_p = 1. write_h[which_detector] = 1. h_value = h_state.get_value() p_value = p_state.get_value() h_value[0, p_idx * pool_size_1:(p_idx + 1) * pool_size_1] = write_h p_value[0, p_idx] = write_p h_state.set_value(h_value) p_state.set_value(p_value) return unnormalized_prob() off_prob = compute_unnormalized_prob(None) on_probs = [ compute_unnormalized_prob(idx) for idx in xrange(pool_size_1) ] denom = off_prob + sum(on_probs) off_prob /= denom on_probs = [on_prob / denom for on_prob in on_probs] assert np.allclose(1., off_prob + sum(on_probs)) # np.asarray(on_probs) doesn't make a numpy vector, so I do it manually wtf_numpy = np.zeros((pool_size_1, )) for i in xrange(pool_size_1): wtf_numpy[i] = on_probs[i] on_probs = wtf_numpy # Check that they match if not np.allclose(expected_p, 1. - off_prob): print('mean field expectation of p:', expected_p) print( 'expectation of p based on enumerating energy function values:', 1. - off_prob) print('pool_size_1:', pool_size_1) assert False if not np.allclose(expected_h, on_probs): print('mean field expectation of h:', expected_h) print( 'expectation of h based on enumerating energy function values:', on_probs) assert False
def check_bvmp_samples(value, num_samples, n, pool_size, mean, tol): """ bvmp=BinaryVectorMaxPool value: a tuple giving (pooled batch, detector batch) (all made with same params) num_samples: number of samples there should be in the batch n: detector layer dimension pool_size: size of each pool region mean: (expected value of pool unit, expected value of detector units) tol: amount the emprical mean is allowed to deviate from the analytical expectation checks that: 1) all values are binary 2) detector layer units are mutually exclusive 3) pooled unit is max of the detector units 4) correct number of samples is present 5) variables are of the right shapes 6) samples converge to the right expected value """ pv, hv = value assert n % pool_size == 0 num_pools = n // pool_size assert pv.ndim == 2 assert pv.shape[0] == num_samples assert pv.shape[1] == num_pools assert hv.ndim == 2 assert hv.shape[0] == num_samples assert hv.shape[1] == n assert is_binary(pv) assert is_binary(hv) for i in xrange(num_pools): sub_p = pv[:, i] assert sub_p.shape == (num_samples, ) sub_h = hv[:, i * pool_size:(i + 1) * pool_size] assert sub_h.shape == (num_samples, pool_size) if not np.all(sub_p == sub_h.max(axis=1)): for j in xrange(num_samples): print(sub_p[j], sub_h[j, :]) assert sub_p[j] == sub_h[j, :] assert False assert np.max(sub_h.sum(axis=1)) == 1 p, h = mean assert p.ndim == 1 assert h.ndim == 1 emp_p = pv.mean(axis=0) emp_h = hv.mean(axis=0) max_diff = np.abs(p - emp_p).max() if max_diff > tol: print('expected value of pooling units: ', p) print('empirical expectation: ', emp_p) print('maximum difference: ', max_diff) raise ValueError("Pooling unit samples have an unlikely mean.") max_diff = np.abs(h - emp_h).max() if max_diff > tol: assert False
def split_train_valid(path, num_valid_train=400, num_valid_extra=200): """ Extract number of class balanced samples from train and extra sets for validation, and regard the remaining as new train set. Parameters ---------- num_valid_train : int, optional Number of samples per class from train num_valid_extra : int, optional Number of samples per class from extra """ # load difficult train data = load("{0}train_32x32.mat".format(path)) valid_index = [] for i in xrange(1, 11): index = numpy.nonzero(data['y'] == i)[0] index.flags.writeable = 1 rng.shuffle(index) valid_index.append(index[:num_valid_train]) valid_index = set(numpy.concatenate(valid_index)) train_index = set(numpy.arange(data['X'].shape[3])) - valid_index valid_index = list(valid_index) train_index = list(train_index) train_x = data['X'][:, :, :, train_index] train_y = data['y'][train_index, :] valid_x = data['X'][:, :, :, valid_index] valid_y = data['y'][valid_index, :] train_size = data['X'].shape[3] assert train_x.shape[3] == train_size - num_valid_train * 10 assert train_y.shape[0] == train_size - num_valid_train * 10 assert valid_x.shape[3] == num_valid_train * 10 assert valid_y.shape[0] == num_valid_train * 10 del data gc.collect() # load extra train data = load("{0}extra_32x32.mat".format(path)) valid_index = [] for i in xrange(1, 11): index = numpy.nonzero(data['y'] == i)[0] index.flags.writeable = 1 rng.shuffle(index) valid_index.append(index[:num_valid_extra]) valid_index = set(numpy.concatenate(valid_index)) train_index = set(numpy.arange(data['X'].shape[3])) - valid_index valid_index = list(valid_index) train_index = list(train_index) train_x = numpy.concatenate( (train_x, data['X'][:, :, :, train_index]), axis=3) train_y = numpy.concatenate((train_y, data['y'][train_index, :])) valid_x = numpy.concatenate( (valid_x, data['X'][:, :, :, valid_index]), axis=3) valid_y = numpy.concatenate((valid_y, data['y'][valid_index, :])) extra_size = data['X'].shape[3] sizes['valid'] = (num_valid_train + num_valid_extra) * 10 sizes['splitted_train'] = train_size + extra_size - sizes['valid'] assert train_x.shape[3] == sizes['splitted_train'] assert train_y.shape[0] == sizes['splitted_train'] assert valid_x.shape[3] == sizes['valid'] assert valid_y.shape[0] == sizes['valid'] del data gc.collect() train_x = numpy.cast[config.floatX](train_x) valid_x = numpy.cast[config.floatX](valid_x) return design_matrix_view(train_x), train_y,\ design_matrix_view(valid_x), valid_y
def estimate_likelihood(W_list, b_list, trainset, testset, free_energy_fn=None, batch_size=100, large_ais=False, log_z=None, pos_mf_steps=50, pos_sample_steps=0): """ Compute estimate of log-partition function and likelihood of trainset and testset Parameters ---------- W_list : array-like object of theano shared variables b_list : array-like object of theano shared variables Biases of the DBM trainset : pylearn2.datasets.dataset.Dataset Training set testset : pylearn2.datasets.dataset.Dataset Test set free_energy_fn : theano.function Function which, given temperature beta_k, computes the free energy of the samples stored in model.samples. This function should return a symbolic vector. batch_size : integer Size of a batch of examples large_ais : boolean If True, will use 3e5 chains, instead of 3e4 log_z : log-partition function (if precomputed) pos_mf_steps: the number of fixed-point iterations for approximate inference pos_sample_steps: same thing as pos_mf_steps when both pos_mf_steps > 0 and pos_sample_steps > 0, pos_mf_steps has a priority Returns ------- nll : scalar Negative log-likelihood of data.X under `model`. logz : scalar Estimate of log-partition function of `model`. """ warnings.warn("This is garanteed to work only for DBMs with a " + "BinaryVector visible layer and BinaryVectorMaxPool " + "hidden layers with pool sizes of 1.") # Add a dummy placeholder for visible layer's weights in W_list W_list = [None] + W_list # Depth of the DBM depth = len(b_list) # Initialize samples psamples = [] nsamples = [] for i, b in enumerate(b_list): psamples += [ utils.sharedX(rng.rand(batch_size, b.get_value().shape[0]), name='psamples%i' % i) ] nsamples += [ utils.sharedX(rng.rand(batch_size, b.get_value().shape[0]), name='nsamples%i' % i) ] psamples[0] = T.matrix('psamples0') ########################## ## BUILD THEANO FUNCTIONS ########################## beta = T.scalar() # For an even number of layers, we marginalize the odd layers # (and vice-versa) marginalize_odd = (depth % 2) == 0 # Build function to retrieve energy. E = -T.dot(nsamples[0], b_list[0]) * beta for i in xrange(1, depth): E -= T.sum(T.dot(nsamples[i - 1], W_list[i] * beta) * nsamples[i], axis=1) E -= T.dot(nsamples[i], b_list[i] * beta) energy_fn = theano.function([beta], E) # Build inference function. assert (pos_mf_steps or pos_sample_steps) pos_steps = pos_mf_steps if pos_mf_steps else pos_sample_steps new_psamples = _e_step(psamples, W_list, b_list, n_steps=pos_steps) ups = OrderedDict() for psample, new_psample in zip(psamples[1:], new_psamples[1:]): ups[psample] = new_psample temp = numpy.asarray(trainset.X, dtype=floatX) mean_train = numpy.mean(temp, axis=0) inference_fn = theano.function(inputs=[psamples[0]], outputs=[], updates=ups) # Configure baserate bias for (h0 if `marginalize_odd` else h1) inference_fn(numpy.tile(mean_train, (batch_size, 1))) numpy_psamples = [mean_train[None, :]] + \ [psample.get_value() for psample in psamples[1:]] mean_pos = numpy.minimum(numpy_psamples[not marginalize_odd], 1 - 1e-5) mean_pos = numpy.maximum(mean_pos, 1e-5) pa_bias = -numpy.log(1. / mean_pos[0] - 1.) # Build Theano function to sample from interpolating distributions. updates = OrderedDict() new_nsamples = neg_sampling(W_list, b_list, nsamples, beta=beta, pa_bias=pa_bias, marginalize_odd=marginalize_odd, theano_rng=theano_rng) for (nsample, new_nsample) in zip(nsamples, new_nsamples): updates[nsample] = new_nsample sample_fn = theano.function([beta], [], updates=updates, name='sample_func') # Build function to compute free-energy of p_k(h1). fe_bp_h1 = free_energy_at_beta(W_list, b_list, nsamples, beta, pa_bias, marginalize_odd=marginalize_odd) free_energy_fn = theano.function([beta], fe_bp_h1) ########### ## RUN AIS ########### # Generate exact sample for the base model. for i, nsample_i in enumerate(nsamples): bias = pa_bias if i == 1 else b_list[i].get_value() hi_mean_vec = 1. / (1. + numpy.exp(-bias)) hi_mean = numpy.tile(hi_mean_vec, (batch_size, 1)) r = rng.random_sample(hi_mean.shape) hi_sample = numpy.array(hi_mean > r, dtype=floatX) nsample_i.set_value(hi_sample) # Default configuration for interpolating distributions if large_ais: betas = numpy.cast[floatX](numpy.hstack( (numpy.linspace(0, 0.5, 1e5 + 1)[:-1], numpy.linspace(0.5, 0.9, 1e5 + 1)[:-1], numpy.linspace(0.9, 1.0, 1e5)))) else: betas = numpy.cast[floatX](numpy.hstack( (numpy.linspace(0, 0.5, 1e4 + 1)[:-1], numpy.linspace(0.5, 0.9, 1e4 + 1)[:-1], numpy.linspace(0.9, 1.0, 1e4)))) if log_z is None: log_ais_w = compute_log_ais_weights(batch_size, free_energy_fn, sample_fn, betas) dlogz, var_dlogz = estimate_from_weights(log_ais_w) log_za = compute_log_za(b_list, pa_bias, marginalize_odd) log_z = log_za + dlogz logging.info('log_z = %f' % log_z) logging.info('log_za = %f' % log_za) logging.info('dlogz = %f' % dlogz) logging.info('var_dlogz = %f' % var_dlogz) train_ll = compute_likelihood_given_logz(nsamples, psamples, batch_size, energy_fn, inference_fn, log_z, trainset.X) logging.info('Training likelihood = %f' % train_ll) test_ll = compute_likelihood_given_logz(nsamples, psamples, batch_size, energy_fn, inference_fn, log_z, testset.X) logging.info('Test likelihood = %f' % test_ll) return (train_ll, test_ll, log_z)
def check_sample_correctishness_channelwise(f): """ Tests that the sample mean converges to the conditional expectation given by the function Tests that p really is the max of the samples tests that at most one h in a group is on """ batch_size = 27 pool_size = 4 n = pool_size * 21 rng = np.random.RandomState([2012, 9, 26]) zv = rng.randn(batch_size, n).astype(config.floatX) * 3.5 - 5. top_down_v = rng.randn(batch_size, n / pool_size).astype(config.floatX) z_th = T.matrix() z_th.tag.test_value = zv z_th.name = 'z_th' top_down_th = T.matrix() top_down_th.tag.test_value = top_down_v top_down_th.name = 'top_down_th' theano_rng = MRG_RandomStreams(rng.randint(2147462579)) p_th, h_th, p_sth, h_sth = f(z_th, pool_size, top_down_th, theano_rng) prob_func = function([z_th, top_down_th], [p_th, h_th]) pv, hv = prob_func(zv, top_down_v) sample_func = function([z_th, top_down_th], [p_sth, h_sth]) acc_p = 0. * pv acc_h = 0. * hv # make sure the test gets good coverage, ie, that it includes # many different activation probs for both detector and pooling layer buckets = 10 bucket_width = 1. / float(buckets) print(pv.min(), pv.max()) print(hv.min(), hv.max()) for i in xrange(buckets): lower_lim = i * bucket_width upper_lim = (i+1) * bucket_width print(lower_lim, upper_lim) assert np.any((pv >= lower_lim) * (pv < upper_lim)) assert np.any((hv >= lower_lim) * (hv < upper_lim)) assert upper_lim == 1. for i in xrange(10000): ps, hs = sample_func(zv, top_down_v) assert ps.shape == pv.shape assert hs.shape == hv.shape acc_p += ps acc_h += hs est_p = acc_p / float(i+1) est_h = acc_h / float(i+1) pd = np.abs(est_p-pv) hd = np.abs(est_h-hv) """ # plot maps of the estimation error, this is to see if it has some # spatial pattern this is useful for detecting bugs like not handling # the border correctly, etc. # from pylearn2.gui.patch_viewer import PatchViewer pv = PatchViewer((pd.shape[0],pd.shape[3]),(pd.shape[1],pd.shape[2]), is_color = False) for i in xrange(pd.shape[0]): for j in xrange(pd.shape[3]): pv.add_patch( (pd[i,:,:,j] / pd.max() )* 2.0 - 1.0, rescale = False) pv.show() pv = PatchViewer((hd.shape[0],hd.shape[3]),(hd.shape[1],hd.shape[2]), is_color = False) for i in xrange(hd.shape[0]): for j in xrange(hd.shape[3]): pv.add_patch( (hd[i,:,:,j] / hd.max() )* 2.0 - 1.0, rescale = False) pv.show() """ """ plot expectation to estimate versus error in estimation expect bigger errors for values closer to 0.5 from matplotlib import pyplot as plt #nelem = reduce( lambda x, y : x*y, pd.shape) #plt.scatter( pv.reshape(nelem), pd.reshape(nelem)) #plt.show() nelem = reduce( lambda x, y : x*y, hd.shape) plt.scatter( hv.reshape(nelem), hd.reshape(nelem)) plt.show() """ # don't really know how tight this should be # but you can try to pose an equivalent problem # and implement it in another way # using a numpy implementation in softmax_acc.py # I got a max error of .17 assert max(pd.max(), hd.max()) < .17 # Do exhaustive checks on just the last sample assert np.all((ps == 0) + (ps == 1)) assert np.all((hs == 0) + (hs == 1)) for k in xrange(batch_size): for i in xrange(ps.shape[1]): p = ps[k, i] h = hs[k, i*pool_size:(i+1)*pool_size] assert h.shape == (pool_size,) assert p == h.max() assert h.sum() <= 1 """ If you made it to here, it's correctish
print(W2.shape) prod = np.dot(W1, W2) pv = make_viewer(prod.T) if out_prefix is None: pv.show() else: pv.save(out_prefix + "_prod.png") print('Sorting so largest-norm layer 2 weights are plotted at the top') norms = np.square(W2).sum(axis=0) idxs = [elem[1] for elem in sorted(zip(-norms, range(norms.shape[0])))] new = W2.copy() for i in xrange(len(idxs)): new[:, i] = W2[:, idxs[i]] W2 = new dataset_yaml_src = model.dataset_yaml_src dataset = yaml_parse.load(dataset_yaml_src) import numpy as np imgs = dataset.get_weights_view(W1.T) N1 = W1.shape[1] N = W2.shape[1] N = min(N, 100)
assert dataset.X.shape[0] % batch_size == 0 X = model.get_input_space().make_batch_theano() Y = model.fprop(X) from theano import tensor as T y = T.argmax(Y, axis=1) from theano import function f = function([X], y) y = [] for i in xrange(int(dataset.X.shape[0] / batch_size)): x_arg = dataset.X[i * batch_size:(i + 1) * batch_size, :] if X.ndim > 2: x_arg = dataset.get_topological_view(x_arg) y.append(f(x_arg.astype(X.dtype))) y = np.concatenate(y) assert y.ndim == 1 assert y.shape[0] == dataset.X.shape[0] # discard any zero-padding that was used to give the batches uniform size y = y[:m] out = open(out_path, 'w') for i in xrange(y.shape[0]): out.write('%d\n' % y[i]) out.close()
def pooling_matrix(groups, per_group, strides=None, dtype=None, sparse=None): """ Construct a pooling matrix, optionally with overlapping pools arranged in a 1 or 2D topology. Parameters ---------- groups : int or tuple The grid dimensions of a 1- or 2-dimensional pooling grid. per_group : int or tuple The grid dimensions of a single 1- or 2-dimensional feature pool. Must be same length as `groups`. strides : int or tuple, optional The stride of the pools along each dimension. A value of `None` is equivalent to setting equal to `per_group`, i.e. no overlap dtype : dtype object or str, optional The dtype of the resulting pooling matrix. sparse : str, optional If `None`, the function will return a dense matrix (a rank-2 `numpy.ndarray`). Specifying 'csc' or 'csr' in this argument will cause the function to return a `scipy.sparse.csc_matrix` or a `scipy.sparse.csr_matrix`, instead. Returns ------- pools : ndarray or sparse matrix Either a dense 2-dimensional NumPy array or one of `scipy.sparse.csc_matrix` or `scipy.sparse.csr_matrix`, depending on the value of the `sparse` argument. In any case, the shape is `(n_pools, n_filters)` and the value of `pools[i, j]` is 1 if feature `j` is in pool `i`, and 0 otherwise. """ # Error-check arguments and fill in row_stride and col_stride # if either argument is absent. def _validate_shape(shape, param_name): try: shape = tuple(shape) [int(val) for val in shape] except (ValueError, TypeError): try: shape = (int(shape), ) except TypeError: reraise_as( TypeError("%s must be int or int tuple" % param_name)) return shape groups = _validate_shape(groups, 'groups') per_group = _validate_shape(per_group, 'per_group') if strides is not None: strides = _validate_shape(strides, 'strides') else: strides = per_group if len(groups) != len(per_group): raise ValueError('groups and per_group must have the same length') elif len(per_group) != len(strides): raise ValueError('per_group and strides must have the same length') if len(groups) > 2 or len(per_group) > 2: raise ValueError('only <= 2-dimensional pooling grids are supported') if not all(stride <= dim for stride, dim in izip(strides, per_group)): raise ValueError('strides must each be <= per_group dimensions') try: group_rows, group_cols = groups rows_per_group, cols_per_group = per_group row_stride, col_stride = strides except ValueError: group_rows, group_cols = groups[0], 1 rows_per_group, cols_per_group = per_group[0], 1 row_stride, col_stride = strides[0], 1 if sparse is not None and sparse not in ('csc', 'csr'): raise ValueError("sparse must be one of (None, 'csr', 'csc')") # The total number of filters along either dimension is the # the number of groups times the stride, plus whatever dangles # off the last filter (the added term is zero if there's no # overlapping pools). filter_rows = group_rows * row_stride + (rows_per_group - row_stride) filter_cols = group_cols * col_stride + (cols_per_group - col_stride) if dtype is None: dtype = theano.config.floatX # If the return type is dense we can treat it as a 4-tensor and # then reshape. If not we'll need some index math, but it happens shape = (group_rows, group_cols, filter_rows, filter_cols) matrix_shape = group_rows * group_cols, filter_rows * filter_cols if sparse is not None: # Use a dictionary-of-keys matrix at construction time, # since they are efficient for arbitrary assignment. # TODO: I think CSC/CSR are fast to construct if you know the total # number of elements, which should be easy to calculate. pools = scipy.sparse.dok_matrix(matrix_shape, dtype=dtype) else: pools = np.zeros(shape, dtype=dtype) for g_row in xrange(group_rows): for g_col in xrange(group_cols): # The start and end points of the contiguous block of 1's. row_start = row_stride * g_row row_end = row_start + rows_per_group col_start = col_stride * g_col col_end = col_start + cols_per_group if sparse is not None: for f_row in xrange(row_start, row_end): matrix_cols = slice(f_row * shape[3] + col_start, f_row * shape[3] + col_end) # The group to which this belongs. matrix_row = g_row * shape[1] + g_col pools[matrix_row, matrix_cols] = 1. else: # If the matrix is a dense 4-tensor then we can get # away with doing an entire pool in one assignment. pools[g_row, g_col, row_start:row_end, col_start:col_end] = 1 if sparse is not None: # Call either .tocsr() or .tocsc() pools = getattr(pools, 'to' + sparse)() else: pools = pools.reshape(matrix_shape) return pools
import sys from theano.compat.six.moves import xrange pyplot.hold(True) from pylearn2.utils import serial model_paths = sys.argv[1:] smoothing = 1 try: smoothing = int(model_paths[0]) model_paths = model_paths[1:] except Exception: pass count = 0 style = '-' for model_path in model_paths: model = serial.load(model_path) smoothed_reward_record = [] count += 1 if count > 7: style = '+' for i in xrange(smoothing - 1, len(model.reward_record)): smoothed_reward_record.append( sum(model.reward_record[i - smoothing + 1:i + 1]) / float(smoothing)) pyplot.plot(smoothed_reward_record, style, label=model_path) pyplot.legend() pyplot.show()
def __init__(self, data_file, which_set, batch_size, sequence=1, normalise=None, shuffle=False, start=None, stop=None, axes=['b', 0, 1, 'c']): self.args = locals() assert which_set in ['train', 'valid'] def dimshuffle(b01c): default = ('b', 0, 1, 'c') return b01c.transpose(*[default.index(axis) for axis in axes]) topo_view = N.load(data_file) if start is not None: assert stop is not None assert start >= 0 assert stop > start assert ((stop - start) % batch_size) == 0 if stop > topo_view.shape[0]: raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(self.X.shape[0])) topo_view = topo_view[start:stop, :] if topo_view.shape[0] != stop - start: raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop)) if normalise == 1: topo_mean = topo_view.mean() topo_std = topo_view.std() elif normalise == 2: topo_mean = topo_view.mean(axis=0) topo_std = topo_view.std(axis=0) topo_view = (topo_view - topo_mean) / topo_std if sequence != 1: temp = topo_view topo_view = np.zeros([temp.shape[0], sequence, temp.shape[1]]) for i in range(0, temp.shape[0] - sequence): topo_view[i, :, :] = temp[i:i + sequence, :].reshape( 1, sequence, temp.shape[1]) del temp else: topo_view = topo_view.reshape(topo_view.shape[0], 1, topo_view.shape[1]) m, r, c = topo_view.shape assert r == sequence topo_view = topo_view.reshape(m, r, c, 1) if shuffle: self.shuffle_rng = make_np_rng(None, [1, 2, 3], which_method="shuffle") for i in xrange(topo_view.shape[0]): j = self.shuffle_rng.randint(m) # Copy ensures that memory is not aliased. tmp = topo_view[i, :, :, :].copy() topo_view[i, :, :, :] = topo_view[j, :, :, :] topo_view[j, :, :, :] = tmp super(CustomLoader, self).__init__(topo_view=dimshuffle(topo_view)) assert not N.any(N.isnan(self.X))
def __init__(self, which_set, shuffle=False, start=None, stop=None, axes=['b', 0, 1, 'c'], preprocessor=None, fit_preprocessor=False, fit_test_preprocessor=False): self.args = locals() if which_set not in ['train', 'valid', 'test']: raise ValueError('Unrecognized which_set value "%s".' % (which_set, ) + '". Valid values are ' + '["train", "valid", "test"].') def dimshuffle(b01c): default = ('b', 0, 1, 'c') return b01c.transpose(*[default.index(axis) for axis in axes]) if control.get_load_data(): path = "${PYLEARN2_DATA_PATH}/binarized_mnist/binarized_mnist_" + \ which_set + ".npy" im_path = serial.preprocess(path) # Locally cache the files before reading them datasetCache = cache.datasetCache im_path = datasetCache.cache_file(im_path) try: X = serial.load(im_path) except IOError: raise NotInstalledError("BinarizedMNIST data files cannot be " "found in ${PYLEARN2_DATA_PATH}. Run " "pylearn2/scripts/datasets/" "download_binarized_mnist.py to get " "the data") else: if which_set == 'train': size = 50000 else: size = 10000 X = numpy.random.binomial(n=1, p=0.5, size=(size, 28**2)) m, d = X.shape assert d == 28**2 if which_set == 'train': assert m == 50000 else: assert m == 10000 if shuffle: self.shuffle_rng = make_np_rng(None, [1, 2, 3], which_method="shuffle") for i in xrange(X.shape[0]): j = self.shuffle_rng.randint(m) # Copy ensures that memory is not aliased. tmp = X[i, :].copy() X[i, :] = X[j, :] X[j, :] = tmp super(BinarizedMNIST, self).__init__(X=X, view_converter=DefaultViewConverter(shape=(28, 28, 1))) assert not numpy.any(numpy.isnan(self.X)) if start is not None: assert start >= 0 if stop > self.X.shape[0]: raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(self.X.shape[0])) assert stop > start self.X = self.X[start:stop, :] if self.X.shape[0] != stop - start: raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop)) if which_set == 'test': assert fit_test_preprocessor is None or \ (fit_preprocessor == fit_test_preprocessor) if self.X is not None and preprocessor: preprocessor.apply(self, fit_preprocessor)
def main(options, positional_args): """ .. todo:: WRITEME """ assert len(positional_args) == 1 path, = positional_args out = options.out rescale = options.rescale if rescale == 'none': global_rescale = False patch_rescale = False elif rescale == 'global': global_rescale = True patch_rescale = False elif rescale == 'individual': global_rescale = False patch_rescale = True else: assert False if path.endswith('.pkl'): from pylearn2.utils import serial obj = serial.load(path) elif path.endswith('.yaml'): print('Building dataset from yaml...') obj = yaml_parse.load_path(path) print('...done') else: obj = yaml_parse.load(path) rows = options.rows cols = options.cols if hasattr(obj, 'get_batch_topo'): #obj is a Dataset dataset = obj examples = dataset.get_batch_topo(rows * cols) else: #obj is a Model model = obj from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams theano_rng = RandomStreams(42) design_examples_var = model.random_design_matrix(batch_size=rows * cols, theano_rng=theano_rng) from theano import function print('compiling sampling function') f = function([], design_examples_var) print('sampling') design_examples = f() print('loading dataset') dataset = yaml_parse.load(model.dataset_yaml_src) examples = dataset.get_topological_view(design_examples) norms = N.asarray([ N.sqrt(N.sum(N.square(examples[i, :]))) for i in xrange(examples.shape[0]) ]) print('norms of examples: ') print('\tmin: ', norms.min()) print('\tmean: ', norms.mean()) print('\tmax: ', norms.max()) print('range of elements of examples', (examples.min(), examples.max())) print('dtype: ', examples.dtype) examples = dataset.adjust_for_viewer(examples) if global_rescale: examples /= N.abs(examples).max() if len(examples.shape) != 4: print('sorry, view_examples.py only supports image examples for now.') print('this dataset has ' + str(len(examples.shape) - 2) + ' topological dimensions') quit(-1) is_color = False assert examples.shape[3] == 2 print(examples.shape[1:3]) pv = patch_viewer.PatchViewer((rows, cols * 2), examples.shape[1:3], is_color=is_color) for i in xrange(rows * cols): # Load patches in backwards order for easier cross-eyed viewing # (Ian can't do the magic eye thing where you focus your eyes past the screen, must # focus eyes in front of screen) pv.add_patch(examples[i, :, :, 1], activation=0.0, rescale=patch_rescale) pv.add_patch(examples[i, :, :, 0], activation=0.0, rescale=patch_rescale) if out is None: pv.show() else: pv.save(out)
def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), scale_rows_to_unit_interval=True, output_pixel_vals=True): """ Transform an array with one flattened image per row, into an array in which images are reshaped and layed out like tiles on a floor. This function is useful for visualizing datasets whose rows are images, and also columns of matrices for transforming those rows (such as the first layer of a neural net). Parameters ---------- x : numpy.ndarray 2-d ndarray or 4 tuple of 2-d ndarrays or None for channels, in which every row is a flattened image. shape : 2-tuple of ints The first component is the height of each image, the second component is the width. tile_shape : 2-tuple of ints The number of images to tile in (row, columns) form. scale_rows_to_unit_interval : bool Whether or not the values need to be before being plotted to [0, 1]. output_pixel_vals : bool Whether or not the output should be pixel values (int8) or floats. Returns ------- y : 2d-ndarray The return value has the same dtype as X, and is suitable for viewing as an image with PIL.Image.fromarray. """ assert len(img_shape) == 2 assert len(tile_shape) == 2 assert len(tile_spacing) == 2 # The expression below can be re-written in a more C style as # follows : # # out_shape = [0,0] # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - # tile_spacing[0] # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - # tile_spacing[1] out_shape = [ (ishp + tsp) * tshp - tsp for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing) ] if isinstance(X, tuple): assert len(X) == 4 # Create an output np ndarray to store the image if output_pixel_vals: out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype='uint8') else: out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype) #colors default to 0, alpha defaults to 1 (opaque) if output_pixel_vals: channel_defaults = [0, 0, 0, 255] else: channel_defaults = [0., 0., 0., 1.] for i in xrange(4): if X[i] is None: # if channel is None, fill it with zeros of the correct # dtype dt = out_array.dtype if output_pixel_vals: dt = 'uint8' out_array[:, :, i] = np.zeros(out_shape, dtype=dt) + \ channel_defaults[i] else: # use a recurrent call to compute the channel and store it # in the output out_array[:, :, i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals) return out_array else: # if we are dealing with only one channel H, W = img_shape Hs, Ws = tile_spacing # generate a matrix to store the output dt = X.dtype if output_pixel_vals: dt = 'uint8' out_array = np.zeros(out_shape, dtype=dt) for tile_row in xrange(tile_shape[0]): for tile_col in xrange(tile_shape[1]): if tile_row * tile_shape[1] + tile_col < X.shape[0]: this_x = X[tile_row * tile_shape[1] + tile_col] if scale_rows_to_unit_interval: # if we should scale values to be between 0 and 1 # do this by calling the `scale_to_unit_interval` # function this_img = scale_to_unit_interval( this_x.reshape(img_shape)) else: this_img = this_x.reshape(img_shape) # add the slice to the corresponding position in the # output array c = 1 if output_pixel_vals: c = 255 out_array[tile_row * (H + Hs):tile_row * (H + Hs) + H, tile_col * (W + Ws):tile_col * (W + Ws) + W] = this_img * c return out_array
def evaluate(imshp, kshp, offset=(1, 1), nkern=1, mode='valid', ws=True): """ Build a sparse matrix which can be used for performing... * convolution: in this case, the dot product of this matrix with the input images will generate a stack of images patches. Convolution is then a tensordot operation of the filters and the patch stack. * sparse local connections: in this case, the sparse matrix allows us to operate the weight matrix as if it were fully-connected. The structured-dot with the input image gives the output for the following layer. Parameters ---------- ker_shape : tuple Shape of kernel to apply (smaller than image) img_shape: tuple Shape of input images mode : str 'valid' generates output only when kernel and image overlap. \ 'full' full convolution obtained by zero-padding the input ws : bool True if weight sharing, False otherwise offset : tuple of int Offset parameter. In the case of no weight sharing, gives the \ pixel offset between two receptive fields. With weight sharing \ gives the offset between the top-left pixels of the generated \ patches Returns ------- rval : tuple(indices, indptr, logical_shape, sp_type, out_img_shp) The structure of a sparse matrix, and the logical dimensions of \ the image which will be the result of filtering. """ N = numpy dx, dy = offset # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imshp) == 2: inshp = (1, ) + imshp inshp = N.array(imshp) kshp = N.array(kshp) ksize = N.prod(kshp) kern = ksize - 1 - N.arange(ksize) # size of output image if doing proper convolution (mode='full',dx=dy=0) # outshp is the actual output shape given the parameters fulloutshp = inshp[1:] + kshp - 1 s = -1 if mode == 'valid' else 1 outshp = N.int64(N.ceil((inshp[1:] + s*kshp - s*1) \ /N.array([dy,dx], dtype='float'))) if any(outshp <= 0): err = 'Invalid kernel', kshp,'and/or step size',(dx,dy),\ 'for given input shape', inshp raise ValueError(err) outsize = N.prod(outshp) insize = N.prod(inshp) # range of output units over which to iterate lbound = N.array([kshp[0] - 1, kshp[1] - 1]) if mode == 'valid' else N.zeros(2) ubound = lbound + (inshp[1:] - kshp + 1) if mode == 'valid' else fulloutshp # coordinates of image in "fulloutshp" coordinates topleft = N.array([kshp[0] - 1, kshp[1] - 1]) botright = topleft + inshp[ 1:] # bound when counting the receptive field # sparse matrix specifics... spmatshp = (outsize*N.prod(kshp)*inshp[0],insize) if ws else\ (nkern*outsize,insize) spmat = scipy_sparse.lil_matrix(spmatshp) # loop over output image pixels z, zz = 0, 0 # incremented every time we write something to the sparse matrix # this is used to track the ordering of filter tap coefficient in sparse # column ordering tapi, ntaps = 0, 0 # Note: looping over the number of kernels could've been done more efficiently # as the last step (when writing to spmat). However, this messes up the ordering # of the column values (order in which you write the values determines how the # vectorized data will get used later one) for fmapi in range(inshp[0]): # loop over input features for n in range( nkern ): # loop over number of kernels (nkern=1 for weight sharing) # FOR EACH OUTPUT PIXEL... for oy in N.arange(lbound[0], ubound[0], dy): # loop over output image height for ox in N.arange(lbound[1], ubound[1], dx): # loop over output image width l = 0 # kern[l] is filter value to apply at (oj,oi) for (iy,ix) # ... ITERATE OVER INPUT UNITS IN RECEPTIVE FIELD for ky in oy + N.arange(kshp[0]): for kx in ox + N.arange(kshp[1]): # verify if we are still within image boundaries. Equivalent to # zero-padding of the input image if all((ky, kx) >= topleft) and all( (ky, kx) < botright): # convert to "valid" input space coords # used to determine column index to write to in sparse mat iy, ix = N.array((ky, kx)) - topleft # determine raster-index of input pixel... col = iy*inshp[2]+ix +\ fmapi*N.prod(inshp[1:]) # taking into account multiple input features # convert oy,ox values to output space coordinates (y, x) = (oy, ox) if mode == 'full' else ( oy, ox) - topleft (y, x) = N.array([y, x]) / ( dy, dx ) # taking into account step size # convert to row index of sparse matrix row = (y*outshp[1]+x)*inshp[0]*ksize + l + fmapi*ksize if ws else\ y*outshp[1] + x # Store something at that location in sparse matrix. # The written value is only useful for the sparse case. It # will determine the way kernel taps are mapped onto # the sparse columns (idea of kernel map) spmat[ row + n * outsize, col] = tapi + 1 # n*... only for sparse # total number of active taps (used for kmap) ntaps += 1 tapi += 1 # absolute tap index (total number of taps) l += 1 # move on to next filter tap l=(l+1)%ksize if spmat.format != 'csc': spmat = spmat.tocsc().ensure_sorted_indices() else: # BUG ALERT: scipy0.6 has bug where data and indices are written in reverse column # ordering. Explicit call to ensure_sorted_indices removes this problem spmat = spmat.ensure_sorted_indices() if ws: kmap = None else: kmap = N.zeros(ntaps, dtype='int') k = 0 #print 'TEMPORARY BUGFIX: REMOVE !!!' for j in xrange(spmat.shape[1]): for i_idx in xrange(spmat.indptr[j], spmat.indptr[j + 1]): if spmat.data[i_idx] != 0: kmap[k] = spmat.data[ i_idx] - 1 # this is == spmat[i,j] - 1 k += 1 # when in valid mode, it is more efficient to store in sparse row # TODO: need to implement structured dot for csr matrix assert spmat.format == 'csc' sptype = 'csc' #sptype = 'csr' if mode=='valid' else 'csc' use_csr_type = 0 if use_csr_type and mode == 'valid': spmat = spmat.tocsr() rval = (spmat.indices[:spmat.size], spmat.indptr, spmatshp, sptype, outshp) rval += (kmap, ) if kmap != None else () return rval
def main(): """ .. todo:: WRITEME """ parser = argparse.ArgumentParser() parser.add_argument("--out") parser.add_argument("model_paths", nargs='+') options = parser.parse_args() model_paths = options.model_paths if options.out is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print('generating names...') model_names = [model_path.replace('.pkl', '!') for model_path in model_paths] model_names = unique_substrings(model_names, min_size=10) model_names = [model_name.replace('!','') for model_name in model_names] print('...done') for i, arg in enumerate(model_paths): try: model = serial.load(arg) except Exception: if arg.endswith('.yaml'): print(sys.stderr, arg + " is a yaml config file," + "you need to load a trained model.", file=sys.stderr) quit(-1) raise this_model_channels = model.monitor.channels if len(sys.argv) > 2: postfix = ":" + model_names[i] else: postfix = "" for channel in this_model_channels: channels[channel+postfix] = this_model_channels[channel] del model gc.collect() while True: # Make a list of short codes for each channel so user can specify them # easily tag_generator = _TagGenerator() codebook = {} sorted_codes = [] for channel_name in sorted(channels, key = number_aware_alphabetical_key): code = tag_generator.get_tag() codebook[code] = channel_name codebook['<'+channel_name+'>'] = channel_name sorted_codes.append(code) x_axis = 'example' print('set x_axis to example') if len(channels.values()) == 0: print("there are no channels to plot") break # If there is more than one channel in the monitor ask which ones to # plot prompt = len(channels.values()) > 1 if prompt: # Display the codebook for code in sorted_codes: print(code + '. ' + codebook[code]) print() print("Put e, b, s or h in the list somewhere to plot " + "epochs, batches, seconds, or hours, respectively.") response = input('Enter a list of channels to plot ' + \ '(example: A, C,F-G, h, <test_err>) or q to quit' + \ ' or o for options: ') if response == 'o': print('1: smooth all channels') print('any other response: do nothing, go back to plotting') response = input('Enter your choice: ') if response == '1': for channel in channels.values(): k = 5 new_val_record = [] for i in xrange(len(channel.val_record)): new_val = 0. count = 0. for j in xrange(max(0, i-k), i+1): new_val += channel.val_record[j] count += 1. new_val_record.append(new_val / count) channel.val_record = new_val_record continue if response == 'q': break #Remove spaces response = response.replace(' ','') #Split into list codes = response.split(',') final_codes = set([]) for code in codes: if code == 'e': x_axis = 'epoch' continue elif code == 'b': x_axis = 'batche' elif code == 's': x_axis = 'second' elif code == 'h': x_axis = 'hour' elif code.startswith('<'): assert code.endswith('>') final_codes.add(code) elif code.find('-') != -1: #The current list element is a range of codes rng = code.split('-') if len(rng) != 2: print("Input not understood: "+code) quit(-1) found = False for i in xrange(len(sorted_codes)): if sorted_codes[i] == rng[0]: found = True break if not found: print("Invalid code: "+rng[0]) quit(-1) found = False for j in xrange(i,len(sorted_codes)): if sorted_codes[j] == rng[1]: found = True break if not found: print("Invalid code: "+rng[1]) quit(-1) final_codes = final_codes.union(set(sorted_codes[i:j+1])) else: #The current list element is just a single code final_codes = final_codes.union(set([code])) # end for code in codes else: final_codes ,= set(codebook.keys()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] styles = list(colors) styles += [color+'--' for color in colors] styles += [color+':' for color in colors] fig = plt.figure() ax = plt.subplot(1,1,1) # plot the requested channels for idx, code in enumerate(sorted(final_codes)): channel_name= codebook[code] channel = channels[channel_name] y = np.asarray(channel.val_record) if contains_nan(y): print(channel_name + ' contains NaNs') if contains_inf(y): print(channel_name + 'contains infinite values') if x_axis == 'example': x = np.asarray(channel.example_record) elif x_axis == 'batche': x = np.asarray(channel.batch_record) elif x_axis == 'epoch': try: x = np.asarray(channel.epoch_record) except AttributeError: # older saved monitors won't have epoch_record x = np.arange(len(channel.batch_record)) elif x_axis == 'second': x = np.asarray(channel.time_record) elif x_axis == 'hour': x = np.asarray(channel.time_record) / 3600. else: assert False ax.plot( x, y, styles[idx % len(styles)], marker = '.', # add point margers to lines label = channel_name) plt.xlabel('# '+x_axis+'s') ax.ticklabel_format( scilimits = (-3,3), axis = 'both') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5,-0.1)) # 0.046 is the size of 1 legend box fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes)) if options.out is None: plt.show() else: plt.savefig(options.out) if not prompt: break
def make_viewer(mat, grid_shape=None, patch_shape=None, activation=None, pad=None, is_color=False, rescale=True): """ Given filters in rows, guesses dimensions of patches and nice dimensions for the PatchViewer and returns a PatchViewer containing visualizations of the filters. Parameters ---------- mat : ndarray Values should lie in [-1, 1] if `rescale` is False. 0. always indicates medium gray, with negative values drawn as blacker and positive values drawn as whiter. A matrix with each row being a different image patch, OR a 4D tensor in ('b', 0, 1, 'c') format. If matrix, we assume it was flattened using the same procedure as a ('b', 0, 1, 'c') DefaultViewConverter uses. grid_shape : tuple, optional A tuple of two ints specifying the shape of the grad in the PatchViewer, in (rows, cols) format. If not specified, this function does its best to choose an aesthetically pleasing value. patch_shape : tupe, optional A tuple of two ints specifying the shape of the patch. If `mat` is 4D, this function gets the patch shape from the shape of `mat`. If `mat` is 2D and patch_shape is not specified, this function assumes the patches are perfectly square. activation : iterable An iterable collection describing some kind of activation value associated with each patch. This is indicated with a border around the patch whose color intensity increases with activation value. The individual activation values may be single floats to draw one border or iterable collections of floats to draw multiple borders with differing intensities around the patch. pad : int, optional The amount of padding to add between patches in the displayed image. is_color : int If True, assume the images are in color. Note needed if `mat` is in ('b', 0, 1, 'c') format since we can just look at its shape[-1]. rescale : bool If True, rescale each patch so that its highest magnitude pixel reaches a value of either 0 or 1 depending on the sign of that pixel. Returns ------- patch_viewer : PatchViewer A PatchViewer containing the patches stored in `mat`. """ num_channels = 1 if is_color: num_channels = 3 if grid_shape is None: grid_shape = PatchViewer.pick_shape(mat.shape[0]) if mat.ndim > 2: patch_shape = mat.shape[1:3] topo_view = mat num_channels = mat.shape[3] is_color = num_channels > 1 else: if patch_shape is None: assert mat.shape[1] % num_channels == 0 patch_shape = PatchViewer.pick_shape(mat.shape[1] // num_channels, exact=True) assert mat.shape[1] == (patch_shape[0] * patch_shape[1] * num_channels) topo_shape = (patch_shape[0], patch_shape[1], num_channels) view_converter = DefaultViewConverter(topo_shape) topo_view = view_converter.design_mat_to_topo_view(mat) rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color=is_color) for i in xrange(mat.shape[0]): if activation is not None: if hasattr(activation[0], '__iter__'): act = [a[i] for a in activation] else: act = activation[i] else: act = None patch = topo_view[i, :] rval.add_patch(patch, rescale=rescale, activation=act) return rval
def stochastic_max_pool_x(x, image_shape, pool_shape=(2, 2), pool_stride=(1, 1), rng=None): """ Parameters ---------- x : theano 4-tensor in format (batch size, channels, rows, cols) image_shape : tuple avoid doing some of the arithmetic in theano pool_shape : tuple shape of the pool region (rows, cols) pool_stride : tuple strides between pooling regions (row stride, col stride) rng : theano random stream """ r, c = image_shape pr, pc = pool_shape rs, cs = pool_stride global pool_size pool_size = pool_shape global stride_size stride_size = pool_stride batch = x.shape[0] channel = x.shape[1] rng = make_theano_rng(rng, 2022, which_method='multinomial') # Compute starting row of the last pool last_pool_r = last_pool(r, pr, rs) * rs # Compute number of rows needed in image for all indexes to work out required_r = last_pool_r + pr last_pool_c = last_pool(c, pc, cs) * cs required_c = last_pool_c + pc # final result shape res_r = int(numpy.floor(last_pool_r / rs)) + 1 res_c = int(numpy.floor(last_pool_c / cs)) + 1 # padding padded = tensor.alloc(0.0, batch, channel, required_r, required_c) #theano.tensor.alloc(value, *shape) - for allocating a new tensor with value filled with "value" x = tensor.set_subtensor(padded[:, :, 0:r, 0:c], x) #theano.tensor.set_subtensor(lval of = operator, rval of = operator) - for assigning a tensor to a subtensor of a tensor # unraveling window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc) # initializing window with proper values for row_within_pool in xrange(pr): row_stop = last_pool_r + row_within_pool + 1 for col_within_pool in xrange(pc): col_stop = last_pool_c + col_within_pool + 1 win_cell = x[:, :, row_within_pool:row_stop:rs, col_within_pool:col_stop:cs] window = tensor.set_subtensor( window[:, :, :, :, row_within_pool, col_within_pool], win_cell) # find the norm norm = window.sum(axis=[4, 5]) #tensor.sum(axis = []) - cal sum over given axes norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm) ''' theano.tensor.eq(a, b) - Returns a variable representing the result of logical equality (a==b) theano.tensor.switch(cond, ift, iff) - Returns a variable representing a switch between ift (iftrue) and iff (iffalse) Basically converting a zero norm to 1.0. ''' norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x') #converting activation values to probabilities using below formula - pi = ai / sum(ai) # get prob prob = rng.multinomial(pvals=norm.reshape( (batch * channel * res_r * res_c, pr * pc)), dtype='float32') # select res = (window * prob.reshape( (batch, channel, res_r, res_c, pr, pc))).max(axis=5).max(axis=4) return res
def __init__(self, which_set, center=False, shuffle=False, binarize=False, start=None, stop=None, axes=['b', 0, 1, 'c'], preprocessor=None, fit_preprocessor=False, fit_test_preprocessor=False): self.args = locals() if which_set not in ['train', 'test']: if which_set == 'valid': raise ValueError( "There is no such thing as the MNIST validation set. MNIST" "consists of 60,000 train examples and 10,000 test" "examples. If you wish to use a validation set you should" "divide the train set yourself. The pylearn2 dataset" "implements and will only ever implement the standard" "train / test split used in the literature.") raise ValueError( 'Unrecognized which_set value "%s".' % (which_set,) + '". Valid values are ["train","test"].') def dimshuffle(b01c): """ .. todo:: WRITEME """ default = ('b', 0, 1, 'c') return b01c.transpose(*[default.index(axis) for axis in axes]) if control.get_load_data(): path = "${PYLEARN2_DATA_PATH}/sign24/" if which_set == 'train': im_path = path + 'train-images-idx3-ubyte' label_path = path + 'train-labels-idx1-ubyte' else: assert which_set == 'test' im_path = path + 't10k-images-idx3-ubyte' label_path = path + 't10k-labels-idx1-ubyte' # Path substitution done here in order to make the lower-level # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g., # the Deep Learning Tutorials, or in another package). im_path = serial.preprocess(im_path) label_path = serial.preprocess(label_path) # Locally cache the files before reading them datasetCache = cache.datasetCache im_path = datasetCache.cache_file(im_path) label_path = datasetCache.cache_file(label_path) topo_view = read_mnist_images(im_path, dtype='float32') y = np.atleast_2d(read_mnist_labels(label_path)).T else: if which_set == 'train': size = 15 elif which_set == 'test': size = 5 else: raise ValueError( 'Unrecognized which_set value "%s".' % (which_set,) + '". Valid values are ["train","test"].') topo_view = np.random.rand(size, 28, 28) y = np.random.randint(0, 10, (size, 1)) if binarize: topo_view = (topo_view > 0.5).astype('float32') y_labels = 24 m, r, c = topo_view.shape assert r == 28 assert c == 28 topo_view = topo_view.reshape(m, r, c, 1) if which_set == 'train': assert m == 3576 elif which_set == 'test': assert m == 1176 else: assert False if center: topo_view -= topo_view.mean(axis=0) if shuffle: self.shuffle_rng = make_np_rng( None, [1, 2, 3], which_method="shuffle") for i in xrange(topo_view.shape[0]): j = self.shuffle_rng.randint(m) # Copy ensures that memory is not aliased. tmp = topo_view[i, :, :, :].copy() topo_view[i, :, :, :] = topo_view[j, :, :, :] topo_view[j, :, :, :] = tmp tmp = y[i:i + 1].copy() y[i] = y[j] y[j] = tmp super(MNIST, self).__init__(topo_view=dimshuffle(topo_view), y=y, axes=axes, y_labels=y_labels) assert not N.any(N.isnan(self.X)) if start is not None: assert start >= 0 if stop > self.X.shape[0]: raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(self.X.shape[0])) assert stop > start self.X = self.X[start:stop, :] if self.X.shape[0] != stop - start: raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop)) if len(self.y.shape) > 1: self.y = self.y[start:stop, :] else: self.y = self.y[start:stop] assert self.y.shape[0] == stop - start if which_set == 'test': assert fit_test_preprocessor is None or \ (fit_preprocessor == fit_test_preprocessor) if self.X is not None and preprocessor: preprocessor.apply(self, fit_preprocessor)
def test_revisit(): # Test that each call to monitor revisits exactly the same data BATCH_SIZE = 3 MAX_BATCH_SIZE = 12 BATCH_SIZE_STRIDE = 3 NUM_BATCHES = 10 num_examples = NUM_BATCHES * BATCH_SIZE monitoring_dataset = ArangeDataset(num_examples) for mon_batch_size in xrange(BATCH_SIZE, MAX_BATCH_SIZE + 1, BATCH_SIZE_STRIDE): nums = [1, 3, int(num_examples / mon_batch_size), None] for mode in sorted(_iteration_schemes): if mode == 'even_sequences' and nums is not None: # even_sequences iterator does not support specifying a fixed number # of minibatches. continue for num_mon_batches in nums: if num_mon_batches is None and mode in [ 'random_uniform', 'random_slice' ]: continue if has_uniform_batch_size(mode) and \ num_mon_batches is not None and \ num_mon_batches * mon_batch_size > num_examples: num_mon_batches = int(num_examples / float(mon_batch_size)) model = DummyModel(1) monitor = Monitor.get_monitor(model) try: monitor.add_dataset(monitoring_dataset, mode, batch_size=mon_batch_size, num_batches=num_mon_batches) except TypeError: monitor.add_dataset(monitoring_dataset, mode, batch_size=mon_batch_size, num_batches=num_mon_batches, seed=0) if has_uniform_batch_size(mode) and num_mon_batches is None: num_mon_batches = int(num_examples / float(mon_batch_size)) elif num_mon_batches is None: num_mon_batches = int( np.ceil(float(num_examples) / float(mon_batch_size))) batches = [None] * int(num_mon_batches) visited = [False] * int(num_mon_batches) batch_idx = shared(0) class RecorderAndValidator(object): def __init__(self): self.validate = False def __call__(self, *data): """ Initially, records the batches the monitor shows it. When set to validate mode, makes sure the batches shown on the second monitor call match those from the first.""" X, = data idx = batch_idx.get_value() batch_idx.set_value(idx + 1) # Note: if the monitor starts supporting variable batch sizes, # take this out. Maybe move it to a new test that the iterator's # uneven property is set accurately warnings.warn( "TODO: add unit test that iterators uneven property is set correctly." ) # assert X.shape[0] == mon_batch_size if self.validate: previous_batch = batches[idx] assert not visited[idx] visited[idx] = True if not np.allclose(previous_batch, X): print('Visited different data in batch', idx) print(previous_batch) print(X) print('Iteration mode', mode) assert False else: batches[idx] = X # end if # end __call__ #end class prereq = RecorderAndValidator() monitor.add_channel(name='dummy', ipt=model.input_space.make_theano_batch(), val=0., prereqs=[prereq], data_specs=(model.get_input_space(), model.get_input_source())) try: monitor() except RuntimeError: print('monitor raised RuntimeError for iteration mode', mode) raise assert None not in batches batch_idx.set_value(0) prereq.validate = True monitor() assert all(visited)
def compute_likelihood_given_logz(nsamples, psamples, batch_size, energy_fn, inference_fn, log_z, test_x): """ Compute test set likelihood as below, where q is the variational approximation to the posterior p(h1,h2|v). ln p(v) \approx \sum_h q(h) E(v,h1,h2) + H(q) - ln Z See section 3.2 of DBM paper for details. Parameters ---------- nsamples : array-like object of theano shared variables Negative samples psamples : array-like object of theano shared variables Positive samples batch_size : scalar Size of a batch of samples energy_fn : theano.function Function which computes the (temperature 1) energy of the samples. This function should return a symbolic vector. inference_fn : theano.function Inference function for DBM. Function takes a T.matrix as input (data) and returns a list of length 'length(b_list)', where the i-th element is an ndarray containing approximate samples of layer i. log_z : scalar Estimate partition function of 'model'. test_x : numpy.ndarray Test set data, in dense design matrix format. Returns ------- likelihood : scalar Negative log-likelihood of test data under the model """ i = 0. likelihood = 0 for i in xrange(0, len(test_x), batch_size): # Recast data as floatX and apply preprocessing if required x = numpy.array(test_x[i:numpy.minimum(test_x.shape[0], i + batch_size), :], dtype=floatX) batch_size0 = len(x) if len(x) < batch_size: # concatenate x to have some dummy entries x = numpy.concatenate( (x, numpy.zeros( (batch_size - len(x), x.shape[1]), dtype=floatX)), axis=0) # Perform inference inference_fn(x) # Entropy of h(q) adds contribution to variational lower-bound hq = 0 for psample in psamples[1:]: temp = \ - psample.get_value() * numpy.log(1e-5 + psample.get_value()) \ - (1.-psample.get_value()) \ * numpy.log(1. - psample.get_value() + 1e-5) hq += numpy.sum(temp, axis=1) # Copy into negative phase buffers to measure energy nsamples[0].set_value(x) for ii, psample in enumerate(psamples): if ii > 0: nsamples[ii].set_value(psample.get_value()) # Compute sum of likelihood for current buffer x_likelihood = numpy.sum((-energy_fn(1.0) + hq - log_z)[:batch_size0]) # Perform moving average of negative likelihood # Divide by len(x) and not bufsize, since last buffer might be smaller likelihood = (i * likelihood + x_likelihood) / (i + batch_size0) return likelihood
def train_batch(self, dataset, batch_size): """ .. todo:: WRITEME """ #TODO-- this results in compilation happening every time learn is # called should cache the compilation results, including those # inside cg X = dataset.get_design_matrix() m = X.shape[0] assert X.shape[1] == self.nvis gamma = N.zeros((batch_size, self.nhid)) cur_gamma = T.vector(name='cur_gamma') cur_v = T.vector(name='cur_v') recons = T.dot(cur_gamma, self.W) recons.name = 'recons' recons_diffs = cur_v - recons recons_diffs.name = 'recons_diffs' recons_diff_sq = T.sqr(recons_diffs) recons_diff_sq.name = 'recons_diff' recons_error = T.sum(recons_diff_sq) recons_error.name = 'recons_error' dict_dists = T.sum(T.sqr(self.W - cur_v), axis=1) dict_dists.name = 'dict_dists' abs_gamma = abs(cur_gamma) abs_gamma.name = 'abs_gamma' weighted_dists = T.dot(abs_gamma, dict_dists) weighted_dists.name = 'weighted_dists' penalty = self.coeff * weighted_dists penalty.name = 'penalty' #prevent directions of absolute flatness in the hessian #W_sq = T.sqr(self.W) #W_sq.name = 'W_sq' #debug = T.sum(W_sq) debug = 1e-10 * T.sum(dict_dists) debug.name = 'debug' #J = debug J = recons_error + penalty + debug J.name = 'J' Jf = function([cur_v, cur_gamma], J) start = self.rng.randint(m - batch_size + 1) batch_X = X[start:start + batch_size, :] #TODO-- optimize gamma logger.info('optimizing gamma') for i in xrange(batch_size): #print str(i+1)+'/'+str(batch_size) gamma[i, :] = self.optimize_gamma(batch_X[i, :]) logger.info('max min') logger.info(N.abs(gamma).min(axis=0).max()) logger.info('min max') logger.info(N.abs(gamma).max(axis=0).max()) #Optimize W logger.info('optimizing W') logger.warning("not tested since switching to Razvan's all-theano " "implementation of linear cg") cg.linear_cg(J, [self.W], max_iters=3) err = 0. for i in xrange(batch_size): err += Jf(batch_X[i, :], gamma[i, :]) assert not N.isnan(err) assert not N.isinf(err) logger.info('err: {0}'.format(err)) return True