def set_topological_view(self, V, axes=('b', 0, 1, 'c')): """ Sets the dataset to represent V, where V is a batch of topological views of examples. .. todo:: Why is this parameter named 'V'? Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. axes : WRITEME """ assert not contains_nan(V) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(V) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not contains_nan(self.X) # Update data specs X_space = VectorSpace(dim=self.X.shape[1]) X_source = 'features' if self.y is None: space = X_space source = X_source else: if self.y.ndim == 1: dim = 1 else: dim = self.y.shape[-1] # This is to support old pickled models if getattr(self, 'y_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.y_labels) elif getattr(self, 'max_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.max_labels) else: y_space = VectorSpace(dim=dim) y_source = 'targets' Latent_space = VectorSpace(dim=self.latent.shape[-1]) Latent_source = 'latents' space = CompositeSpace((X_space, y_space,Latent_space)) source = (X_source, y_source,Latent_source) self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source)
def test_contains_nan(): """ Tests that pylearn2.utils.contains_nan correctly identifies `np.nan` values in an array. """ arr = np.random.random(100) assert not contains_nan(arr) arr[0] = np.nan assert contains_nan(arr)
def set_mri_topological_view(self, topo_view, mask=None, axes=("b", 0, 1, "c")): """ Set the topological view. Parameters ---------- topo_view: array-like Topological view of a matrix, 4D. Should be MRI 4D data. mask: array-like Mask for data. axes: tuple, optional Axis to use to set topological view. Returns ------- design_matrix: array-like The corresponding design matrix for the topological view. """ assert not contains_nan(topo_view) r, c, d = tuple(topo_view.shape[axes.index(i)] for i in (0, 1, "c")) self.view_converter = MRIViewConverterTransposed((r, c, d), mask=mask, axes=axes) design_matrix = self.view_converter.topo_view_to_design_mat(topo_view) return design_matrix
def main_loop(self): self.algorithm.setup(agent=self.agent, environment=self.environment) i = 0 for param in self.agent.get_params(): assert not contains_nan(param.get_value()), (i, param.name) assert not contains_inf(param.get_value()), (i, param.name) while True: rval = self.algorithm.train() assert rval is None i += 1 for param in self.agent.get_params(): assert not contains_nan(param.get_value()), (i, param.name) assert not contains_inf(param.get_value()), (i, param.name) if i % 1000 == 0: serial.save(self.save_path, self.agent) logger.info('saved!')
def next(self): """ Get the next subset of the dataset during dataset iteration. Converts index selections for batches to boolean selections that are supported by HDF5 datasets. """ next_index = self._subset_iterator.next() # convert to boolean selection sel = np.zeros(self.num_examples, dtype=bool) sel[next_index] = True next_index = sel rval = [] for data, fn in safe_izip(self._raw_data, self._convert): try: this_data = data[next_index] except TypeError: this_data = data[next_index, :] if fn: this_data = fn(this_data) assert not contains_nan(this_data) rval.append(this_data) rval = tuple(rval) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def __init__(self, which_set, label_type=None, azimuth=False, rotation=False, texture=False, center=False, contrast_normalize=False, seed=132987): assert which_set in ['train', 'valid', 'test'] assert label_type in [ None, 'label', 'azimuth', 'rotation', 'texture_id' ] # load data fname = '${PYLEARN2_DATA_PATH}/mnistplus/mnistplus' if azimuth: fname += '_azi' if rotation: fname += '_rot' if texture: fname += '_tex' data = load(fname + '.pkl') # get images and cast to floatX data_x = np.cast[config.floatX](data['data']) data_x = data_x[MNISTPlus.idx[which_set]] if contrast_normalize: meanx = np.mean(data_x, axis=1)[:, None] stdx = np.std(data_x, axis=1)[:, None] data_x = (data_x - meanx) / stdx if center: data_x -= np.mean(data_x, axis=0) # get labels data_y = None if label_type is not None: data_y = data[label_type] # convert to float for performing regression if label_type in ['azimuth', 'rotation']: data_y = np.cast[config.floatX](data_y / 360.) # retrieve only subset of data data_y = data_y[MNISTPlus.idx[which_set]] # create view converting for retrieving topological view view_converter = dense_design_matrix.DefaultViewConverter((48, 48)) # init the super class super(MNISTPlus, self).__init__(X=data_x, y=data_y, y_labels=np.max(data_y) + 1, view_converter=view_converter) assert not contains_nan(self.X)
def __init__(self, which_set, numOfClasses, numOfExamplesPerClass, axes=('b', 0, 1, 'c')): self.height = 32 self.width = 100 self.axes = axes self.dtype = 'uint8' self.examples = [] self.img_shape = (1, self.height, self.width) self.img_size = numpy.prod(self.img_shape) self.numOfClasses = numOfClasses self.numOfExamplesPerClass = numOfExamplesPerClass self.classes = [] self.examplesPerClassCount = {} if which_set == "train": self.fileToLoadFrom = "annotation_train.txt" elif which_set == "test": self.fileToLoadFrom = "annotation_test.txt" elif which_set == "valid": self.fileToLoadFrom = "annotation_val.txt" else: raise ValueError("Set not recognized") self.datapath = "/media/tommaso/Lacie/mnt/ramdisk/max/90kDICT32px/" self.loadData() X = numpy.cast['float32'](self.x) view_converter = dense_design_matrix.DefaultViewConverter((self.height, self.width, 1), axes) super(MJSYNTH, self).__init__(X=X, y=self.y, view_converter=view_converter, y_labels=numOfClasses) assert not contains_nan(self.X)
def set_mri_topological_view(self, topo_view, mask=None, axes=("b", 0, 1, "c")): """ Set the topological view. Parameters ---------- topo_view: array-like Topological view of a matrix, 4D. Should be MRI 4D data. mask: array-like Mask for data. axes: tuple, optional Axis to use to set topological view. Returns ------- design_matrix: array-like The corresponding design matrix for the topological view. """ assert not contains_nan(topo_view) r, c, d = tuple(topo_view.shape[axes.index(i)] for i in (0, 1, "c")) self.view_converter = MRIViewConverterTransposed( (r, c, d), mask=mask, axes=axes) design_matrix = self.view_converter.topo_view_to_design_mat(topo_view) return design_matrix
def __init__(self, which_set, which_experiment, start=None, stop=None, axes=('b', 0, 1, 'c'), preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) assert which_set in ['train', 'test'] assert which_experiment in ['S100', 'ADD3_10_S100', 'ADD3_10_S250', 'ADD3_ALL_S100', 'RM3_S100', 'RP3_S100'] self.experiment = which_experiment data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}') experiment_folder_string = "experiment_"+string.lower(which_experiment) path = os.path.join(data_dir,"cifar10",experiment_folder_string,which_set+".pkl") meta_path = os.path.join(data_dir,"cifar10",experiment_folder_string,"meta") self.axes = axes # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) meta = serial.load(meta_path) #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] self.label_names = meta['label_names'] self.n_classes = len(self.label_names) obj = serial.load(path) X = obj['data'] if(which_set == 'train'): ntrain = X.shape[0] if(which_set == 'test'): ntest = X.shape[0] assert X.max() == 255. assert X.min() == 0. X = numpy.cast['float32'](X) y = numpy.asarray(obj['labels']).astype('uint8') if which_set == 'test': y = y.reshape((y.shape[0], 1)) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(Experiment, self).__init__(X=X, y=y, y_labels=self.n_classes, view_converter=view_converter, axes=self.axes) assert not contains_nan(self.X)
def __init__(self, which_set, label_type=None, center=False, contrast_normalize=False, seed=132987): assert which_set in ['train', 'valid', 'test'] assert label_type in [ None, 'label', 'azimuth', 'rotation', 'texture_id'] # load data fname = '${PYLEARN2_DATA_PATH}/mnistplus/mnistplus' if label_type == 'azimuth': fname += '_azi' if label_type == 'rotation': fname += '_rot' label_type = 'label' if label_type == 'texture_id': fname += '_tex' label_type = 'label' data = load(fname + '.pkl') # get images and cast to floatX data_x = np.cast[config.floatX](data['data']) data_x = data_x[MNISTPlus.idx[which_set]] if contrast_normalize: meanx = np.mean(data_x, axis=1)[:, None] stdx = np.std(data_x, axis=1)[:, None] data_x = (data_x - meanx) / stdx if center: data_x -= np.mean(data_x, axis=0) # get labels data_y = None if label_type is not None: data_y = data[label_type].reshape(-1, 1) # convert to float for performing regression if label_type == 'azimuth': data_y = np.cast[config.floatX](data_y / 360.) # retrieve only subset of data data_y = data_y[MNISTPlus.idx[which_set]] view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1)) # init the super class if data_y is not None: super(MNISTPlus, self).__init__( X=data_x, y=data_y, y_labels=np.max(data_y) + 1, view_converter=view_converter ) else: super(MNISTPlus, self).__init__( X=data_x, view_converter=view_converter ) assert not contains_nan(self.X)
def __init__(self, which_set, one_hot=False, axes=['b', 0, 1, 'c']): """ .. todo:: WRITEME """ self.args = locals() assert which_set in self.data_split.keys() path = serial.preprocess( "${PYLEARN2_DATA_PATH}/ocr_letters/letter.data") with open(path, 'r') as data_f: data = data_f.readlines() data = [line.split("\t") for line in data] data_x = [map(int, item[6:-1]) for item in data] data_letters = [item[1] for item in data] data_fold = [int(item[5]) for item in data] letters = list(numpy.unique(data_letters)) data_y = [letters.index(item) for item in data_letters] if which_set == 'train': split = slice(0, self.data_split['train']) elif which_set == 'valid': split = slice(self.data_split['train'], self.data_split['train'] + self.data_split['valid']) elif which_set == 'test': split = slice(self.data_split['train'] + self.data_split['valid'], (self.data_split['train'] + self.data_split['valid'] + self.data_split['test'])) data_x = numpy.asarray(data_x[split]) data_y = numpy.asarray(data_y[split]) data_fold = numpy.asarray(data_y[split]) assert data_x.shape[0] == data_y.shape[0] assert data_x.shape[0] == self.data_split[which_set] self.one_hot = one_hot if one_hot: one_hot = numpy.zeros( (data_y.shape[0], len(letters)), dtype='float32') for i in xrange(data_y.shape[0]): one_hot[i, data_y[i]] = 1. data_y = one_hot view_converter = dense_design_matrix.DefaultViewConverter( (16, 8, 1), axes) super(OCR, self).__init__( X=data_x, y=data_y, view_converter=view_converter) assert not contains_nan(self.X) self.fold = data_fold
def set_data(self, data, data_specs): """ .. todo:: WRITEME """ # data is organized as data_specs # keep self.data_specs, and convert data data_specs[0].np_validate(data) assert not [contains_nan(X) for X in data] raise NotImplementedError()
def entropy_binary_vector(P): """ .. todo:: WRITEME properly If P[i,j] represents the probability of some binary random variable X[i,j] being 1, then rval[i] gives the entropy of the random vector X[i,:] """ for Pv in get_debug_values(P): assert Pv.min() >= 0.0 assert Pv.max() <= 1.0 oneMinusP = 1. - P PlogP = xlogx(P) omPlogOmP = xlogx(oneMinusP) term1 = - T.sum(PlogP, axis=1) assert len(term1.type.broadcastable) == 1 term2 = - T.sum(omPlogOmP, axis=1) assert len(term2.type.broadcastable) == 1 rval = term1 + term2 debug_vals = get_debug_values(PlogP, omPlogOmP, term1, term2, rval) for plp, olo, t1, t2, rv in debug_vals: debug_assert(isfinite(plp)) debug_assert(isfinite(olo)) debug_assert(not contains_nan(t1)) debug_assert(not contains_nan(t2)) debug_assert(not contains_nan(rv)) return rval
def entropy_binary_vector(P): """ .. todo:: WRITEME properly If P[i,j] represents the probability of some binary random variable X[i,j] being 1, then rval[i] gives the entropy of the random vector X[i,:] """ for Pv in get_debug_values(P): assert Pv.min() >= 0.0 assert Pv.max() <= 1.0 oneMinusP = 1. - P PlogP = xlogx(P) omPlogOmP = xlogx(oneMinusP) term1 = -T.sum(PlogP, axis=1) assert len(term1.type.broadcastable) == 1 term2 = -T.sum(omPlogOmP, axis=1) assert len(term2.type.broadcastable) == 1 rval = term1 + term2 debug_vals = get_debug_values(PlogP, omPlogOmP, term1, term2, rval) for plp, olo, t1, t2, rv in debug_vals: debug_assert(isfinite(plp)) debug_assert(isfinite(olo)) debug_assert(not contains_nan(t1)) debug_assert(not contains_nan(t2)) debug_assert(not contains_nan(rv)) return rval
def do_check_on(var, nd, f, is_input): """ Checks `var` for NaNs / Infs. If detected, raises an exception and / or prints information about `nd`, `f`, and `is_input` to help the user determine the cause of the invalid values. Parameters ---------- var : numpy.ndarray The value to be checked. nd : theano.gof.Apply The Apply node being executed f : callable The thunk for the apply node is_input : bool If True, `var` is an input to `nd`. If False, it is an output. """ error = False if nan_is_error: if contains_nan(var): logger.error('NaN detected') error = True if inf_is_error: if contains_inf(var): logger.error('Inf detected') error = True if big_is_error: if np.abs(var).max() > 1e10: logger.error('Big value detected') error = True if error: if is_input: logger.error('In an input') else: logger.error('In an output') logger.error('Inputs: ') for ivar, ival in zip(nd.inputs, f.inputs): logger.error('var') logger.error(ivar) logger.error(theano.printing.min_informative_str(ivar)) logger.error('val') logger.error(ival) logger.error('Node:') logger.error(nd) assert False
def next(self): """ Get the next subset of the dataset during dataset iteration. Converts index selections for batches to boolean selections that are supported by HDF5 datasets. """ next_index = self._subset_iterator.next() # convert to boolean selection sel = np.zeros(self.num_examples, dtype=bool) sel[next_index] = True next_index = sel rval = [] for data, fn in safe_izip(self._raw_data, self._convert): try: this_data = data[next_index] except TypeError: # FB: Why this try..except is there? I think this is useless. # Do not hide the original if we can't fall back. # FV: This is triggered if the shape of next_index is # incompatible with the shape of the dataset. See for an # example test_hdf5_topo_view(), where where i # next.index.shape = (10,) and data is 'data': <HDF5 # dataset "y": shape (10, 3), type "<f8"> # I think it would be better to explicitly check if # next_index.shape is incompatible with data.shape, for # instance checking if next_index.ndim == data.ndim if data.ndim > 1: this_data = data[next_index, :] else: raise # Check if the dataset data is a vector and transform it into a # one-column matrix. This is needed to automatically convert the # shape of the data later (in the format_as method of the # Space.) if fn: this_data = fn(this_data) assert not contains_nan(this_data) rval.append(this_data) rval = tuple(rval) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def __init__(self, which_set, center=False, example_range=None): """ .. todo:: WRITEME """ if which_set == 'train': train = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/train.mat') # Load the class names self.class_names = [array[0].encode('utf-8') for array in train['class_names'][0]] # Load the fold indices fold_indices = train['fold_indices'] assert fold_indices.shape == (1, 10) self.fold_indices = np.zeros((10, 1000), dtype='uint16') for i in xrange(10): indices = fold_indices[0, i] assert indices.shape == (1000, 1) assert indices.dtype == 'uint16' self.fold_indices[i, :] = indices[:, 0] # The data is stored as uint8 # If we leave it as uint8, it will cause the CAE to silently fail # since theano will treat derivatives wrt X as 0 X = np.cast['float32'](train['X']) assert X.shape == (5000, 96 * 96 * 3) if example_range is not None: X = X[example_range[0]:example_range[1], :] # this is uint8 y = train['y'][:, 0] assert y.shape == (5000,) elif which_set == 'test': test = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/test.mat') # Load the class names self.class_names = [array[0].encode('utf-8') for array in test['class_names'][0]] # The data is stored as uint8 # If we leave it as uint8, it will cause the CAE to silently fail # since theano will treat derivatives wrt X as 0 X = np.cast['float32'](test['X']) assert X.shape == (8000, 96 * 96 * 3) if example_range is not None: X = X[example_range[0]:example_range[1], :] # this is uint8 y = test['y'][:, 0] assert y.shape == (8000,) elif which_set == 'unlabeled': unlabeled = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/' 'unlabeled.mat') X = unlabeled['X'] # this file is stored in HDF format, which transposes everything assert X.shape == (96 * 96 * 3, 100000) assert X.dtype == 'uint8' if example_range is None: X = X.value else: X = X.value[:, example_range[0]:example_range[1]] X = np.cast['float32'](X.T) unlabeled.close() y = None else: raise ValueError('"' + which_set + '" is not an STL10 dataset. ' 'Recognized values are "train", "test", and ' '"unlabeled".') if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 3)) super(STL10, self).__init__(X=X, y=y, y_labels=10, view_converter=view_converter) for i in xrange(self.X.shape[0]): mat = X[i:i + 1, :] topo = self.get_topological_view(mat) for j in xrange(topo.shape[3]): temp = topo[0, :, :, j].T.copy() topo[0, :, :, j] = temp mat = self.get_design_matrix(topo) X[i:i + 1, :] = mat assert not contains_nan(self.X)
def __init__(self, which_set, center=False, rescale=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.) * 10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i * 10000:(i + 1) * 10000, :] = data['data'] y[i * 10000:(i + 1) * 10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data data = CIFAR10._unpickle('test_batch') # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: " + str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: " + str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = self.monitoring_dataset is not None and self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = any( d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values() ) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and not has_uniform_batch_size(self.train_iteration_mode): raise ValueError( "Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential" ) if ( has_force_batch_size and has_monitoring_datasets and monitoring_datasets_are_uneven and not has_uniform_batch_size(self.monitor_iteration_mode) ): raise ValueError( "Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential" ) data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = "%s[%s]" % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, **fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = "objective" # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: if self.monitoring_batch_size is None and self.monitoring_batches is None: self.monitoring_batch_size = self.batch_size self.monitoring_batches = self.batches_per_iter self.monitor.setup( dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.monitoring_batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode, ) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] # TODO: have Monitor support non-data-dependent channels self.monitor.add_channel( name="learning_rate", ipt=None, val=learning_rate, data_specs=(NullSpace(), ""), dataset=monitoring_dataset, ) if self.learning_rule: self.learning_rule.add_channels_to_monitor(self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = "sgd_params[%d]" % i grads, updates = self.cost.get_gradients(model, nested_args, **fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError( str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict." ) for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = "grad(%(costname)s, %(paramname)s)" % { "costname": cost_value.name, "paramname": param.name, } assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError( "Tried to scale the learning rate on " + str(key) + " which is not an optimization parameter." ) log.info("Parameter and initial learning rate summary:") for param in params: param_name = param.name if param_name is None: param_name = "anon_param" lr = learning_rate.get_value() * lr_scalers.get(param, 1.0) log.info("\t" + param_name + ": " + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates(learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict( safe_zip( params, [param - learning_rate * lr_scalers.get(param, 1.0) * grads[param] for param in params] ) ) ) for param in params: if updates[param].name is None: updates[param].name = "sgd_update(" + param.name + ")" model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = "censor(sgd_update(" + param.name + "))" for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, "Compiling sgd_update"): self.sgd_update = function( theano_args, updates=updates, name="sgd_update", on_unused_input="ignore", mode=self.theano_function_mode, ) self.params = params
def main(): """ .. todo:: WRITEME """ parser = argparse.ArgumentParser() parser.add_argument("--out") parser.add_argument("model_paths", nargs='+') parser.add_argument("--yrange", help='The y-range to be used for plotting, e.g. 0:1') options = parser.parse_args() model_paths = options.model_paths if options.out is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print('generating names...') model_names = [model_path.replace('.pkl', '!') for model_path in model_paths] model_names = unique_substrings(model_names, min_size=10) model_names = [model_name.replace('!','') for model_name in model_names] print('...done') for i, arg in enumerate(model_paths): try: model = serial.load(arg) except Exception: if arg.endswith('.yaml'): print(sys.stderr, arg + " is a yaml config file," + "you need to load a trained model.", file=sys.stderr) quit(-1) raise this_model_channels = model.monitor.channels if len(sys.argv) > 2: postfix = ":" + model_names[i] else: postfix = "" for channel in this_model_channels: channels[channel+postfix] = this_model_channels[channel] del model gc.collect() while True: # Make a list of short codes for each channel so user can specify them # easily tag_generator = _TagGenerator() codebook = {} sorted_codes = [] for channel_name in sorted(channels, key = number_aware_alphabetical_key): code = tag_generator.get_tag() codebook[code] = channel_name codebook['<'+channel_name+'>'] = channel_name sorted_codes.append(code) x_axis = 'example' print('set x_axis to example') if len(channels.values()) == 0: print("there are no channels to plot") break # If there is more than one channel in the monitor ask which ones to # plot prompt = len(channels.values()) > 1 if prompt: # Display the codebook for code in sorted_codes: print(code + '. ' + codebook[code]) print() print("Put e, b, s or h in the list somewhere to plot " + "epochs, batches, seconds, or hours, respectively.") response = input('Enter a list of channels to plot ' + \ '(example: A, C,F-G, h, <test_err>) or q to quit' + \ ' or o for options: ') if response == 'o': print('1: smooth all channels') print('any other response: do nothing, go back to plotting') response = input('Enter your choice: ') if response == '1': for channel in channels.values(): k = 5 new_val_record = [] for i in xrange(len(channel.val_record)): new_val = 0. count = 0. for j in xrange(max(0, i-k), i+1): new_val += channel.val_record[j] count += 1. new_val_record.append(new_val / count) channel.val_record = new_val_record continue if response == 'q': break #Remove spaces response = response.replace(' ','') #Split into list codes = response.split(',') final_codes = set([]) for code in codes: if code == 'e': x_axis = 'epoch' continue elif code == 'b': x_axis = 'batche' elif code == 's': x_axis = 'second' elif code == 'h': x_axis = 'hour' elif code.startswith('<'): assert code.endswith('>') final_codes.add(code) elif code.find('-') != -1: #The current list element is a range of codes rng = code.split('-') if len(rng) != 2: print("Input not understood: "+code) quit(-1) found = False for i in xrange(len(sorted_codes)): if sorted_codes[i] == rng[0]: found = True break if not found: print("Invalid code: "+rng[0]) quit(-1) found = False for j in xrange(i,len(sorted_codes)): if sorted_codes[j] == rng[1]: found = True break if not found: print("Invalid code: "+rng[1]) quit(-1) final_codes = final_codes.union(set(sorted_codes[i:j+1])) else: #The current list element is just a single code final_codes = final_codes.union(set([code])) # end for code in codes else: final_codes ,= set(codebook.keys()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] styles = list(colors) styles += [color+'--' for color in colors] styles += [color+':' for color in colors] fig = plt.figure() ax = plt.subplot(1,1,1) # plot the requested channels for idx, code in enumerate(sorted(final_codes)): channel_name= codebook[code] channel = channels[channel_name] y = np.asarray(channel.val_record) if contains_nan(y): print(channel_name + ' contains NaNs') if contains_inf(y): print(channel_name + 'contains infinite values') if x_axis == 'example': x = np.asarray(channel.example_record) elif x_axis == 'batche': x = np.asarray(channel.batch_record) elif x_axis == 'epoch': try: x = np.asarray(channel.epoch_record) except AttributeError: # older saved monitors won't have epoch_record x = np.arange(len(channel.batch_record)) elif x_axis == 'second': x = np.asarray(channel.time_record) elif x_axis == 'hour': x = np.asarray(channel.time_record) / 3600. else: assert False ax.plot( x, y, styles[idx % len(styles)], marker = '.', # add point margers to lines label = channel_name) plt.xlabel('# '+x_axis+'s') ax.ticklabel_format( scilimits = (-3,3), axis = 'both') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc = 'upper left', bbox_to_anchor = (1.05, 1.02)) # Get the axis positions and the height and width of the legend plt.draw() ax_pos = ax.get_position() pad_width = ax_pos.x0 * fig.get_size_inches()[0] pad_height = ax_pos.y0 * fig.get_size_inches()[1] dpi = fig.get_dpi() lgd_width = ax.get_legend().get_frame().get_width() / dpi lgd_height = ax.get_legend().get_frame().get_height() / dpi # Adjust the bounding box to encompass both legend and axis. Axis should be 3x3 inches. # I had trouble getting everything to align vertically. ax_width = 3 ax_height = 3 total_width = 2*pad_width + ax_width + lgd_width total_height = 2*pad_height + np.maximum(ax_height, lgd_height) fig.set_size_inches(total_width, total_height) ax.set_position([pad_width/total_width, 1-6*pad_height/total_height, ax_width/total_width, ax_height/total_height]) if(options.yrange is not None): ymin, ymax = map(float, options.yrange.split(':')) plt.ylim(ymin, ymax) if options.out is None: plt.show() else: plt.savefig(options.out) if not prompt: break
def set_topological_view(self, V, axes=('b', 0, 1, 2, 'c')): """ Sets the dataset to represent V, where V is a batch of topological views of examples. .. todo:: Why is this parameter named 'V'? Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. axes : tuple, optional The axes ordering of the provided topo_view. Must be some permutation of ('b', 0, 1, 2, 'c') where 'b' indicates the axis indexing examples, 0, 1 and indicate the row/cols/time dimensions and 'c' indicates the axis indexing color channels. """ if len(V.shape) != len(axes): raise ValueError("The topological view must have exactly 5 " "dimensions, corresponding to %s" % str(axes)) assert not contains_nan(V) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] frames = V.shape[axes.index(2)] channels = V.shape[axes.index('c')] self.view_converter = Default3dViewConverter([rows, cols, frames, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(V) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not contains_nan(self.X) # Update data specs X_space = VectorSpace(dim=self.X.shape[1]) X_source = 'features' if self.y is None: space = X_space source = X_source else: if self.y.ndim == 1: dim = 1 else: dim = self.y.shape[-1] # This is to support old pickled models if getattr(self, 'y_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.y_labels) elif getattr(self, 'max_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.max_labels) else: y_space = VectorSpace(dim=dim) y_source = 'targets' space = CompositeSpace((X_space, y_space)) source = (X_source, y_source) self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source)
def __init__(self, which_set, which_experiment, center=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) assert which_set in ['train', 'test','valid'] assert which_experiment in ['S100', 'ADD3_10_S100', 'ADD3_10_S250', 'ADD3_ALL_S100', 'RM3_S100', 'RP3_S100'] self.experiment = which_experiment index_set = which_set if index_set == 'valid': index_set = 'train' data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}') experiment_folder_string = "experiment_"+string.lower(which_experiment) path = os.path.join(data_dir,"cifar10",experiment_folder_string,index_set) meta_path = os.path.join(data_dir,"cifar10",experiment_folder_string,"meta") self.axes = axes # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) meta = serial.load(meta_path) #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] self.label_names = meta['label_names'] self.n_classes = len(self.label_names) obj = serial.load(path) X = obj['data'] assert X.max() == 255. assert X.min() == 0. X = numpy.cast['float32'](X) y = numpy.zeros((X.shape[0], 1), dtype=numpy.uint8) y[0:X.shape[0],0] = numpy.asarray(obj['labels']).astype(numpy.uint8) #y = numpy.asarray(obj['labels']).astype(numpy.uint8) if(which_set == 'train'): ntrain = X.shape[0] if(which_set == 'test'): ntest = X.shape[0] if(which_set == 'valid'): iarray = numpy.random.randint(X.shape[0], size=1000) X = X[iarray] y = y[iarray] assert X.shape[0] == y.shape[0] #y_s = numpy.asarray(obj['labels']).astype(numpy.uint8) #y = numpy.zeros((y_s.shape[0], self.n_classes), dtype=numpy.uint8) #for i in xrange(y_s.shape[0]): # label = y_s[i] # y[i,label]=1.0 if center: X -= 127.5 self.center = center if toronto_prepro: assert not center assert not gcn if which_set == 'test': raise NotImplementedError("Need to subtract the mean of the " "*training* set.") X = X / 255. X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: assert isinstance(gcn, float) X = (X.T - X.mean(axis=1)).T X = (X.T / numpy.sqrt(numpy.square(X).sum(axis=1))).T X *= gcn if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(Experiment, self).__init__(X=X, y=y, y_labels=self.n_classes, view_converter=view_converter, axes=self.axes) assert not contains_nan(self.X)
def train_all(self, dataset, mu=None): """ Process kmeans algorithm on the input to localize clusters. Parameters ---------- dataset : WRITEME mu : WRITEME Returns ------- rval : bool WRITEME """ # TODO-- why does this sometimes return X and sometimes return nothing? X = dataset.get_design_matrix() n, m = X.shape k = self.k if milk is not None: # use the milk implementation of k-means if it's available cluster_ids, mu = milk.kmeans(X, k) else: # our own implementation # taking random inputs as initial clusters if user does not provide # them. if mu is not None: if not len(mu) == k: raise Exception("You gave %i clusters" ", but k=%i were expected" % (len(mu), k)) else: indices = numpy.random.randint(X.shape[0], size=k) mu = X[indices] try: dists = numpy.zeros((n, k)) except MemoryError as e: improve_memory_error_message(e, "dying trying to allocate " "dists matrix for {0} " "examples and {1} " "means".format(n, k)) old_kills = {} iter = 0 mmd = prev_mmd = float('inf') while True: if self.verbose: logger.info('kmeans iter {0}'.format(iter)) # print 'iter:',iter,' conv crit:',abs(mmd-prev_mmd) # if numpy.sum(numpy.isnan(mu)) > 0: if contains_nan(mu): logger.info('nan found') return X # computing distances for i in xrange(k): dists[:, i] = numpy.square((X - mu[i, :])).sum(axis=1) if iter > 0: prev_mmd = mmd min_dists = dists.min(axis=1) # mean minimum distance: mmd = min_dists.mean() logger.info('cost: {0}'.format(mmd)) if iter > 0 and (iter >= self.max_iter or abs(mmd - prev_mmd) < self.convergence_th): # converged break # finding minimum distances min_dist_inds = dists.argmin(axis=1) # computing means i = 0 blacklist = [] new_kills = {} while i < k: b = min_dist_inds == i if not numpy.any(b): killed_on_prev_iter = True # initializes empty cluster to be the mean of the d # data points farthest from their corresponding means if i in old_kills: d = old_kills[i] - 1 if d == 0: d = 50 new_kills[i] = d else: d = 5 mu[i, :] = 0 for j in xrange(d): idx = numpy.argmax(min_dists) min_dists[idx] = 0 # chose point idx mu[i, :] += X[idx, :] blacklist.append(idx) mu[i, :] /= float(d) # cluster i was empty, reset it to d far out data # points recomputing distances for this cluster dists[:, i] = numpy.square((X - mu[i, :])).sum(axis=1) min_dists = dists.min(axis=1) for idx in blacklist: min_dists[idx] = 0 min_dist_inds = dists.argmin(axis=1) # done i += 1 else: mu[i, :] = numpy.mean(X[b, :], axis=0) if contains_nan(mu): logger.info('nan found at {0}'.format(i)) return X i += 1 old_kills = new_kills iter += 1 self.mu = sharedX(mu) self._params = [self.mu] return True
def __init__(self, which_set, center=False, rescale=False, gcn=None, one_hot=None, start=None, stop=None, axes=('b', 'c', 0, 1), preprocessor = None, noise_v=0.): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.noise_v = noise_v self.axes = axes # we define here: dtype = 'float32' ntrain = 288 nvalid = 0 # artefact, we won't use it ntest = 72 # we also expose the following details: self.img_shape = (1, 32, 32) self.img_size = N.prod(self.img_shape) self.n_classes = 36 self.label_names = ['fadg0', 'fcft0', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # prepare loading ''' fnames = ['data_batch_%i' % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.)*10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i*10000:(i+1)*10000, :] = data['data'] y[i*10000:(i+1)*10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break ''' path = os.path.join(serial.preprocess('${VIDTIMIT}'), 'data', 'cut_dataset.pkl') dataset = cPickle.load(file(path)) # process this data train_idx = dataset['train_idx']==1 test_idx = dataset['test_idx']==1 if noise_v==0.: v_noise_tr = 0. v_noise_te = 0. else: v_noise_tr = np.random.normal(0., noise_v, size=dataset['data'][train_idx].reshape((-1,54*32*32)).shape) v_noise_te = np.random.normal(0., noise_v, size=dataset['data'][test_idx].reshape((-1,54*32*32)).shape) Xs = {'train': dataset['data'][train_idx].reshape((-1,54*32*32)) + v_noise_tr, 'test': dataset['data'][test_idx].reshape((-1,54*32*32)) + v_noise_te} Ys = {'train': dataset['labels'][train_idx], 'test': dataset['labels'][test_idx]} X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 72 y = y.reshape((y.shape[0], 1)) max_labels = 36 if one_hot is not None: ynew = np.zeros((y.shape[0], 36)) for i in range(y.shape[0]): ynew[y[i]] = 1 warnings.warn("the `one_hot` parameter is deprecated. To get " "one-hot encoded targets, request that they " "live in `VectorSpace` through the `data_specs` " "parameter of MNIST's iterator method. " "`one_hot` will be removed on or after " "September 20, 2014.", stacklevel=2) y = ynew.astype('int32') if center: X -= 0.2845 self.center = center if rescale: X /= .1644 self.rescale = rescale if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 72 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 54), axes) super(VIDTIMIT, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center=False, rescale=False, gcn=None, one_hot=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None, two_image=False): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_shape2 = (32, 32,3) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.)*10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i*10000:(i+1)*10000, :] = data['data'] y[i*10000:(i+1)*10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data data = CIFAR10._unpickle('test_batch') # 2value image # can not use other option when you use two_image option print x.shape if two_image: from PIL import Image two_value_x = [] self.img_shape = (1, 32, 32) self.img_shape2 = (32, 32,1) for i,pixel in enumerate(x.reshape(50000, 3, 32, 32)): if i % 1000 == 0: print i pixel = np.transpose(pixel, (1,2,0)) test_img = Image.new("RGB",(32,32),(255,0,0)) test_img.putdata([tuple(x.tolist()) for x in pixel.reshape(1024,3)]) two_value_x.append([x for x in test_img.convert("1").getdata()] ) x = np.asarray(two_value_x) # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) max_labels = 10 if one_hot is not None: warnings.warn("the `one_hot` parameter is deprecated. To get " "one-hot encoded targets, request that they " "live in `VectorSpace` through the `data_specs` " "parameter of MNIST's iterator method. " "`one_hot` will be removed on or after " "September 20, 2014.", stacklevel=2) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 # view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), # axes) view_converter = dense_design_matrix.DefaultViewConverter(self.img_shape2, axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def test_grad_s(self): "tests that the gradients with respect to s_i are 0 after doing a mean field update of s_i " model = self.model e_step = self.e_step X = self.X assert X.shape[0] == self.m model.test_batch_size = X.shape[0] init_H = e_step.init_H_hat(V = X) init_Mu1 = e_step.init_S_hat(V = X) prev_setting = config.compute_test_value config.compute_test_value= 'off' H, Mu1 = function([], outputs=[init_H, init_Mu1])() config.compute_test_value = prev_setting H = broadcast(H, self.m) Mu1 = broadcast(Mu1, self.m) H = np.cast[config.floatX](self.model.rng.uniform(0.,1.,H.shape)) Mu1 = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,Mu1.shape)) H_var = T.matrix(name='H_var') H_var.tag.test_value = H Mu1_var = T.matrix(name='Mu1_var') Mu1_var.tag.test_value = Mu1 idx = T.iscalar() idx.tag.test_value = 0 S = e_step.infer_S_hat(V = X, H_hat = H_var, S_hat = Mu1_var) s_idx = S[:,idx] s_i_func = function([H_var,Mu1_var,idx],s_idx) sigma0 = 1. / model.alpha Sigma1 = e_step.infer_var_s1_hat() mu0 = T.zeros_like(model.mu) #by truncated KL, I mean that I am dropping terms that don't depend on H and Mu1 # (they don't affect the outcome of this test and some of them are intractable ) trunc_kl = - model.entropy_hs(H_hat = H_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) + \ model.expected_energy_vhs(V = X, H_hat = H_var, S_hat = Mu1_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) grad_Mu1 = T.grad(trunc_kl.sum(), Mu1_var) grad_Mu1_idx = grad_Mu1[:,idx] grad_func = function([H_var, Mu1_var, idx], grad_Mu1_idx) for i in xrange(self.N): Mu1[:,i] = s_i_func(H, Mu1, i) g = grad_func(H,Mu1,i) assert not contains_nan(g) g_abs_max = np.abs(g).max() if g_abs_max > self.tol: raise Exception('after mean field step, gradient of kl divergence wrt mean field parameter should be 0, but here the max magnitude of a gradient element is '+str(g_abs_max)+' after updating s_'+str(i))
def test_grad_s(self): "tests that the gradients with respect to s_i are 0 after doing a mean field update of s_i " model = self.model e_step = self.e_step X = self.X assert X.shape[0] == self.m model.test_batch_size = X.shape[0] init_H = e_step.init_H_hat(V=X) init_Mu1 = e_step.init_S_hat(V=X) prev_setting = config.compute_test_value config.compute_test_value = 'off' H, Mu1 = function([], outputs=[init_H, init_Mu1])() config.compute_test_value = prev_setting H = broadcast(H, self.m) Mu1 = broadcast(Mu1, self.m) H = np.cast[config.floatX](self.model.rng.uniform(0., 1., H.shape)) Mu1 = np.cast[config.floatX](self.model.rng.uniform( -5., 5., Mu1.shape)) H_var = T.matrix(name='H_var') H_var.tag.test_value = H Mu1_var = T.matrix(name='Mu1_var') Mu1_var.tag.test_value = Mu1 idx = T.iscalar() idx.tag.test_value = 0 S = e_step.infer_S_hat(V=X, H_hat=H_var, S_hat=Mu1_var) s_idx = S[:, idx] s_i_func = function([H_var, Mu1_var, idx], s_idx) sigma0 = 1. / model.alpha Sigma1 = e_step.infer_var_s1_hat() mu0 = T.zeros_like(model.mu) #by truncated KL, I mean that I am dropping terms that don't depend on H and Mu1 # (they don't affect the outcome of this test and some of them are intractable ) trunc_kl = - model.entropy_hs(H_hat = H_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) + \ model.expected_energy_vhs(V = X, H_hat = H_var, S_hat = Mu1_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) grad_Mu1 = T.grad(trunc_kl.sum(), Mu1_var) grad_Mu1_idx = grad_Mu1[:, idx] grad_func = function([H_var, Mu1_var, idx], grad_Mu1_idx) for i in xrange(self.N): Mu1[:, i] = s_i_func(H, Mu1, i) g = grad_func(H, Mu1, i) assert not contains_nan(g) g_abs_max = np.abs(g).max() if g_abs_max > self.tol: raise Exception( 'after mean field step, gradient of kl divergence wrt mean field parameter should be 0, but here the max magnitude of a gradient element is ' + str(g_abs_max) + ' after updating s_' + str(i))
def test_grad_h(self): "tests that the gradients with respect to h_i are 0 after doing a mean field update of h_i " model = self.model e_step = self.e_step X = self.X assert X.shape[0] == self.m init_H = e_step.init_H_hat(V=X) init_Mu1 = e_step.init_S_hat(V=X) prev_setting = config.compute_test_value config.compute_test_value = 'off' H, Mu1 = function([], outputs=[init_H, init_Mu1])() config.compute_test_value = prev_setting H = broadcast(H, self.m) Mu1 = broadcast(Mu1, self.m) H = np.cast[config.floatX](self.model.rng.uniform(0., 1., H.shape)) Mu1 = np.cast[config.floatX](self.model.rng.uniform( -5., 5., Mu1.shape)) H_var = T.matrix(name='H_var') H_var.tag.test_value = H Mu1_var = T.matrix(name='Mu1_var') Mu1_var.tag.test_value = Mu1 idx = T.iscalar() idx.tag.test_value = 0 new_H = e_step.infer_H_hat(V=X, H_hat=H_var, S_hat=Mu1_var) h_idx = new_H[:, idx] updates_func = function([H_var, Mu1_var, idx], h_idx) sigma0 = 1. / model.alpha Sigma1 = e_step.infer_var_s1_hat() mu0 = T.zeros_like(model.mu) #by truncated KL, I mean that I am dropping terms that don't depend on H and Mu1 # (they don't affect the outcome of this test and some of them are intractable ) trunc_kl = - model.entropy_hs(H_hat = H_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) + \ model.expected_energy_vhs(V = X, H_hat = H_var, S_hat = Mu1_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) grad_H = T.grad(trunc_kl.sum(), H_var) assert len(grad_H.type.broadcastable) == 2 #from theano.printing import min_informative_str #print min_informative_str(grad_H) #grad_H = Print('grad_H')(grad_H) #grad_H_idx = grad_H[:,idx] grad_func = function([H_var, Mu1_var], grad_H) failed = False for i in xrange(self.N): rval = updates_func(H, Mu1, i) H[:, i] = rval g = grad_func(H, Mu1)[:, i] assert not contains_nan(g) g_abs_max = np.abs(g).max() if g_abs_max > self.tol: #print "new values of H" #print H[:,i] #print "gradient on new values of H" #print g failed = True print 'iteration ', i #print 'max value of new H: ',H[:,i].max() #print 'H for failing g: ' failing_h = H[np.abs(g) > self.tol, i] #print failing_h #from matplotlib import pyplot as plt #plt.scatter(H[:,i],g) #plt.show() #ignore failures extremely close to h=1 high_mask = failing_h > .001 low_mask = failing_h < .999 mask = high_mask * low_mask print 'masked failures: ', mask.shape[0], ' err ', g_abs_max if mask.sum() > 0: print 'failing h passing the range mask' print failing_h[mask.astype(bool)] raise Exception( 'after mean field step, gradient of kl divergence' ' wrt freshly updated variational parameter should be 0, ' 'but here the max magnitude of a gradient element is ' + str(g_abs_max) + ' after updating h_' + str(i))
def __init__(self, which_set, center=False, rescale=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) self.n_classes = 100 # make sure that this is working (we can also copy it from meta file) self.label_names = range(1900,2000) import cPickle fo = open('datasets/data_batch') dict = cPickle.load(fo) fo.close() lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000 x = numpy.zeros((lenx, self.img_size), dtype=dtype) y = numpy.zeros((lenx, 1), dtype=dtype) # load train data #data = serial.load(datasets[fname]) x[0:8305,:] = dict['data'] #x[i * 10000:(i + 1) * 10000, :] = dict['data'] #y[i * 10000:(i + 1) * 10000, 0] = dict['labels'] #X = dict['data'] y[0:8305,0] = dict['labels'] # load test data #_logger.info('loading file %s' % datasets['test_batch']) #data = serial.load(datasets['test_batch']) # process this data #Xs = {'train': x[0:ntrain], # 'test': data['data'][0:ntest]} #Ys = {'train': y[0:ntrain], # 'test': data['labels'][0:ntest]} X = numpy.cast['float32'](x[0:8305]) y = y[0:8305] # y = Ys[which_set] if isinstance(y, list): y = numpy.asarray(y).astype(dtype) self.center = center self.rescale = rescale self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(Timeliner, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = \ dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = \ self.monitoring_dataset is not None and \ self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = \ any(d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values()) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and \ not has_uniform_batch_size(self.train_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") if has_force_batch_size and has_monitoring_datasets and \ monitoring_datasets_are_uneven and \ not has_uniform_batch_size(self.monitor_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' learning_rate = self.learning_rate params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates( learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost. # We have to do that after learning_rule.get_updates has been # called, since it may have an effect on # learning_rule.add_channels_to_monitor (that is currently the case # for AdaDelta and RMSProp). self._setup_monitor() with log_timing(log, 'Compiling sgd_update'): self.sgd_update = function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ', self.restrict[ 0], ' through ', self.restrict[1], ' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1, 2))) else: assert False def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval output = np.zeros((num_examples, num_output_features), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if contains_nan(feat): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].max() else: assert False assert output[i:i + batch_size, :].max() < 1e20 t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def __init__(self, which_set, fold=0, image_size=48, example_range=None, center=False, scale=False, shuffle=False, one_hot=False, rng=None, seed=132987, preprocessor=None, axes=('b', 0, 1, 'c')): if which_set not in self.mapper.keys(): raise ValueError("Unrecognized which_set value: %s. Valid values" + "are %s." % (str(which_set), str(self.mapper.keys()))) assert (fold >= 0) and (fold < 5) self.args = locals() # load data path = '${PYLEARN2_DATA_PATH}/faces/TFD/' if image_size == 48: data = load(path + 'TFD_48x48.mat') elif image_size == 96: data = load(path + 'TFD_96x96.mat') else: raise ValueError("image_size should be either 48 or 96.") # retrieve indices corresponding to `which_set` and fold number if self.mapper[which_set] == 4: set_indices = (data['folds'][:, fold] == 1) + \ (data['folds'][:, fold] == 2) else: set_indices = data['folds'][:, fold] == self.mapper[which_set] assert set_indices.sum() > 0 # limit examples returned to `example_range` if example_range: ex_range = slice(example_range[0], example_range[1]) else: ex_range = slice(None) # get images and cast to float32 data_x = data['images'][set_indices] data_x = np.cast['float32'](data_x) data_x = data_x[ex_range] # create dense design matrix from topological view data_x = data_x.reshape(data_x.shape[0], image_size ** 2) if center and scale: data_x[:] -= 127.5 data_x[:] /= 127.5 elif center: data_x[:] -= 127.5 elif scale: data_x[:] /= 255. if shuffle: rng = make_np_rng(rng, seed, which_method='permutation') rand_idx = rng.permutation(len(data_x)) data_x = data_x[rand_idx] # get labels if which_set != 'unlabeled': data_y = data['labs_ex'][set_indices] data_y = data_y[ex_range] - 1 data_y_identity = data['labs_id'][set_indices] data_y_identity = data_y_identity[ex_range] if shuffle: data_y = data_y[rand_idx] data_y_identity = data_y_identity[rand_idx] self.one_hot = one_hot if one_hot: one_hot = np.zeros((data_y.shape[0], 7), dtype='float32') for i in xrange(data_y.shape[0]): one_hot[i, data_y[i]] = 1. data_y = one_hot else: data_y = None data_y_identity = None # create view converting for retrieving topological view view_converter = dense_design_matrix.DefaultViewConverter((image_size, image_size, 1), axes) # init the super class super(TFD, self).__init__(X=data_x, y=data_y, view_converter=view_converter) assert not contains_nan(self.X) self.y_identity = data_y_identity self.axes = axes if preprocessor is not None: preprocessor.apply(self)
def test_grad_h(self): "tests that the gradients with respect to h_i are 0 after doing a mean field update of h_i " model = self.model e_step = self.e_step X = self.X assert X.shape[0] == self.m init_H = e_step.init_H_hat(V = X) init_Mu1 = e_step.init_S_hat(V = X) prev_setting = config.compute_test_value config.compute_test_value= 'off' H, Mu1 = function([], outputs=[init_H, init_Mu1])() config.compute_test_value = prev_setting H = broadcast(H, self.m) Mu1 = broadcast(Mu1, self.m) H = np.cast[config.floatX](self.model.rng.uniform(0.,1.,H.shape)) Mu1 = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,Mu1.shape)) H_var = T.matrix(name='H_var') H_var.tag.test_value = H Mu1_var = T.matrix(name='Mu1_var') Mu1_var.tag.test_value = Mu1 idx = T.iscalar() idx.tag.test_value = 0 new_H = e_step.infer_H_hat(V = X, H_hat = H_var, S_hat = Mu1_var) h_idx = new_H[:,idx] updates_func = function([H_var,Mu1_var,idx], h_idx) sigma0 = 1. / model.alpha Sigma1 = e_step.infer_var_s1_hat() mu0 = T.zeros_like(model.mu) #by truncated KL, I mean that I am dropping terms that don't depend on H and Mu1 # (they don't affect the outcome of this test and some of them are intractable ) trunc_kl = - model.entropy_hs(H_hat = H_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) + \ model.expected_energy_vhs(V = X, H_hat = H_var, S_hat = Mu1_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) grad_H = T.grad(trunc_kl.sum(), H_var) assert len(grad_H.type.broadcastable) == 2 #from theano.printing import min_informative_str #print min_informative_str(grad_H) #grad_H = Print('grad_H')(grad_H) #grad_H_idx = grad_H[:,idx] grad_func = function([H_var, Mu1_var], grad_H) failed = False for i in xrange(self.N): rval = updates_func(H, Mu1, i) H[:,i] = rval g = grad_func(H,Mu1)[:,i] assert not contains_nan(g) g_abs_max = np.abs(g).max() if g_abs_max > self.tol: #print "new values of H" #print H[:,i] #print "gradient on new values of H" #print g failed = True print('iteration ',i) #print 'max value of new H: ',H[:,i].max() #print 'H for failing g: ' failing_h = H[np.abs(g) > self.tol, i] #print failing_h #from matplotlib import pyplot as plt #plt.scatter(H[:,i],g) #plt.show() #ignore failures extremely close to h=1 high_mask = failing_h > .001 low_mask = failing_h < .999 mask = high_mask * low_mask print('masked failures: ',mask.shape[0],' err ',g_abs_max) if mask.sum() > 0: print('failing h passing the range mask') print(failing_h[ mask.astype(bool) ]) raise Exception('after mean field step, gradient of kl divergence' ' wrt freshly updated variational parameter should be 0, ' 'but here the max magnitude of a gradient element is ' +str(g_abs_max)+' after updating h_'+str(i))
def main(): """ .. todo:: WRITEME """ parser = argparse.ArgumentParser() parser.add_argument("--out") parser.add_argument("model_paths", nargs='+') options = parser.parse_args() model_paths = options.model_paths if options.out is not None: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print 'generating names...' model_names = [model_path.replace('.pkl', '!') for model_path in model_paths] model_names = unique_substrings(model_names, min_size=10) model_names = [model_name.replace('!','') for model_name in model_names] print '...done' for i, arg in enumerate(model_paths): try: model = serial.load(arg) except Exception: if arg.endswith('.yaml'): print >> sys.stderr, arg + " is a yaml config file," + \ "you need to load a trained model." quit(-1) raise this_model_channels = model.monitor.channels if len(sys.argv) > 2: postfix = ":" + model_names[i] else: postfix = "" for channel in this_model_channels: channels[channel+postfix] = this_model_channels[channel] del model gc.collect() while True: # Make a list of short codes for each channel so user can specify them # easily tag_generator = _TagGenerator() codebook = {} sorted_codes = [] for channel_name in sorted(channels, key = number_aware_alphabetical_key): code = tag_generator.get_tag() codebook[code] = channel_name codebook['<'+channel_name+'>'] = channel_name sorted_codes.append(code) x_axis = 'example' print 'set x_axis to example' if len(channels.values()) == 0: print "there are no channels to plot" break # If there is more than one channel in the monitor ask which ones to # plot prompt = len(channels.values()) > 1 if prompt: # Display the codebook for code in sorted_codes: print code + '. ' + codebook[code] print print "Put e, b, s or h in the list somewhere to plot " + \ "epochs, batches, seconds, or hours, respectively." response = raw_input('Enter a list of channels to plot ' + \ '(example: A, C,F-G, h, <test_err>) or q to quit' + \ ' or o for options: ') if response == 'o': print '1: smooth all channels' print 'any other response: do nothing, go back to plotting' response = raw_input('Enter your choice: ') if response == '1': for channel in channels.values(): k = 5 new_val_record = [] for i in xrange(len(channel.val_record)): new_val = 0. count = 0. for j in xrange(max(0, i-k), i+1): new_val += channel.val_record[j] count += 1. new_val_record.append(new_val / count) channel.val_record = new_val_record continue if response == 'q': break #Remove spaces response = response.replace(' ','') #Split into list codes = response.split(',') final_codes = set([]) for code in codes: if code == 'e': x_axis = 'epoch' continue elif code == 'b': x_axis = 'batche' elif code == 's': x_axis = 'second' elif code == 'h': x_axis = 'hour' elif code.startswith('<'): assert code.endswith('>') final_codes.add(code) elif code.find('-') != -1: #The current list element is a range of codes rng = code.split('-') if len(rng) != 2: print "Input not understood: "+code quit(-1) found = False for i in xrange(len(sorted_codes)): if sorted_codes[i] == rng[0]: found = True break if not found: print "Invalid code: "+rng[0] quit(-1) found = False for j in xrange(i,len(sorted_codes)): if sorted_codes[j] == rng[1]: found = True break if not found: print "Invalid code: "+rng[1] quit(-1) final_codes = final_codes.union(set(sorted_codes[i:j+1])) else: #The current list element is just a single code final_codes = final_codes.union(set([code])) # end for code in codes else: final_codes ,= set(codebook.keys()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] styles = list(colors) styles += [color+'--' for color in colors] styles += [color+':' for color in colors] fig = plt.figure() ax = plt.subplot(1,1,1) # plot the requested channels for idx, code in enumerate(sorted(final_codes)): channel_name= codebook[code] channel = channels[channel_name] y = np.asarray(channel.val_record) if contains_nan(y): print channel_name + ' contains NaNs' if contains_inf(y): print channel_name + 'contains infinite values' if x_axis == 'example': x = np.asarray(channel.example_record) elif x_axis == 'batche': x = np.asarray(channel.batch_record) elif x_axis == 'epoch': try: x = np.asarray(channel.epoch_record) except AttributeError: # older saved monitors won't have epoch_record x = np.arange(len(channel.batch_record)) elif x_axis == 'second': x = np.asarray(channel.time_record) elif x_axis == 'hour': x = np.asarray(channel.time_record) / 3600. else: assert False ax.plot( x, y, styles[idx % len(styles)], marker = '.', # add point margers to lines label = channel_name) plt.xlabel('# '+x_axis+'s') ax.ticklabel_format( scilimits = (-3,3), axis = 'both') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5,-0.1)) # 0.046 is the size of 1 legend box fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes)) if options.out is None: plt.show() else: plt.savefig(options.out) if not prompt: break
def __init__(self, which_set, center=False, rescale=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) self.n_classes = 10 self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] datasets = {} datapath = os.path.join( string_utils.preprocess('${PYLEARN2_DATA_PATH}'), 'cifar10', 'cifar-10-batches-py') for name in fnames + ['test_batch']: fname = os.path.join(datapath, name) if not os.path.exists(fname): raise IOError(fname + " was not found. You probably need to " "download the CIFAR-10 dataset by using the " "download script in " "pylearn2/scripts/datasets/download_cifar10.sh " "or manually from " "http://www.cs.utoronto.ca/~kriz/cifar.html") datasets[name] = cache.datasetCache.cache_file(fname) lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000 x = numpy.zeros((lenx, self.img_size), dtype=dtype) y = numpy.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): _logger.info('loading file %s' % datasets[fname]) data = serial.load(datasets[fname]) x[i * 10000:(i + 1) * 10000, :] = data['data'] y[i * 10000:(i + 1) * 10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data _logger.info('loading file %s' % datasets['test_batch']) data = serial.load(datasets['test_batch']) # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = numpy.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = numpy.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print('restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive') full_X = full_X[self.restrict[0]:self.restrict[1],:] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) ) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print('defining features') V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print('compiling theano function') f = function([V],feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1,2)) ) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1,2)) ) else: assert False def average_pool( stride ): def point( p ): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32') for i in xrange(stride): for j in xrange(stride): rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] ) return rval output = np.zeros((num_examples,num_output_features),dtype='float32') fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nhid] ) ) ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) if len(range(0,num_examples-batch_size+1,batch_size)) <= 0: print(num_examples) print(batch_size) for i in xrange(0,num_examples-batch_size+1,batch_size): print(i) t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i+batch_size,:]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit = False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if contains_nan(feat): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1, left[j]:right[j]+1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1, left[j]:right[j]+1, idxs[j]].max() else: assert False assert output[i:i+batch_size,:].max() < 1e20 t6 = time.time() print((t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5)) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy' np.save(save_path,output) if nan > 0: warnings.warn(str(nan)+' features were nan')
def __init__( self, lfw_path, filelist_path, embedding_file=None, center=False, scale=False, start=None, stop=None, gcn=None, shuffle=False, rng=None, seed=132987, axes=("b", 0, 1, "c"), img_shape=(3, 250, 250), ): self.axes = axes self.img_shape = img_shape C, H, W = img_shape self.img_size = np.prod(self.img_shape) files = [] with open(filelist_path, "r") as filelist_f: files = [line.strip() for line in filelist_f] # Load raw pixel integer values dtype = "uint8" X = np.zeros((len(files), W, H, C), dtype=dtype) img_ids = [] for i, line in enumerate(files): if "\t" in line: # New format: contains image IDs img_path, img_id = line.strip().split() img_ids.append(int(img_id)) else: img_path = line.strip() full_path = os.path.join(lfw_path, img_path) im = image.load(full_path, rescale_image=False, dtype=dtype) # Handle grayscale images which may not have RGB channels if len(im.shape) == 2: W, H = im.shape # Repeat image 3 times across axis 2 im = im.reshape(W, H, 1).repeat(3, 2) # Swap color channel to front X[i] = im # Cast to float32, center / scale if necessary X = np.cast["float32"](X) # Create dense design matrix from topological view X = X.reshape(X.shape[0], -1) # Prepare img_ids if embedding_file is not None: if len(img_ids) != len(files): raise ValueError("You must provide a filelist with indexes " "into the embedding array for each image.") img_ids = np.array(img_ids, dtype="uint32") if center and scale: X[:] -= 127.5 X[:] /= 127.5 elif center: X[:] -= 127.5 elif scale: X[:] /= 255.0 self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if shuffle: rng = make_np_rng(rng, seed, which_method="permutation") rand_idx = rng.permutation(len(X)) X = X[rand_idx] img_ids = img_ids[rand_idx] if start is not None: assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop] if len(img_ids) > 0: img_ids = img_ids[start:stop] # Load embeddings if provided Y = None if embedding_file is not None: embeddings = np.load(embedding_file)["arr_0"] assert embeddings.shape[0] >= len(files) Y = embeddings[img_ids].astype(theano.config.floatX) # create view converting for retrieving topological view self.view_converter = dense_design_matrix.DefaultViewConverter((W, H, C), axes) # init super class super(LFW, self).__init__(X=X, y=Y) assert not contains_nan(self.X) # Another hack: rename 'targets' to match model expectations if embedding_file is not None: space, (X_source, y_source) = self.data_specs self.data_specs = (space, (X_source, "condition"))