def _get_one_hot(fname, csv, lb_sheet): assert isinstance(csv, pd.DataFrame) and isinstance(lb_sheet, pd.DataFrame) labels = csv.loc[csv[du.FNAME] == fname][du.LABEL] assert len(labels) == 1 label = list(labels)[0] index = list(lb_sheet.loc[lb_sheet[du.LABEL] == label]['index'])[0] return misc.convert_to_one_hot([index], 41)
def load_as_tframe_data(cls, data_dir, file_name=None, permute=False, permute_mark='alpha', **kwargs): # Check file name if file_name is None: file_name = cls._get_file_name(permute, permute_mark) + '.tfds' data_path = os.path.join(data_dir, file_name) if os.path.exists(data_path): return SequenceSet.load(data_path) # If data does not exist, create a new data set console.show_status('Creating data ...') images, labels = MNIST.load_as_numpy_arrays(data_dir) # images (70000, 784, 1), np.float64 images = images.reshape(images.shape[0], -1, 1) / 255. # permute images if necessary if permute: images = np.swapaxes(images, 0, 1) images = np.random.permutation(images) images = np.swapaxes(images, 0, 1) # labels (70000, 10), np.float64 labels = convert_to_one_hot(labels, 10) # Wrap data into a Sequence Set features = [image for image in images] targets = [label for label in labels] data_set = SequenceSet(features, summ_dict={'targets': targets}, n_to_one=True, name='pMNIST') console.show_status('Saving data set ...') data_set.save(data_path) console.show_status('Data set saved to `{}`'.format(data_path)) return data_set
def load(cls, data_dir, train_size, validate_size, test_size, flatten=False, one_hot=True): data_set = cls.load_as_tframe_data(data_dir) if flatten: data_set.features = data_set.features.reshape(data_set.size, -1) if one_hot: data_set.targets = misc.convert_to_one_hot( data_set.targets, data_set[data_set.NUM_CLASSES]) return cls._split_and_return(data_set, train_size, validate_size, test_size)
def load_as_tframe_data(cls, data_dir, file_name=None, raw_data_dir=None, force_create=False): # Check file_name if file_name is None: file_name = 'timit-25.tfds' data_path = os.path.join(data_dir, file_name) if not force_create and os.path.exists(data_path): return SignalSet.load(data_path) # If data does not exist, create a new data set console.show_status('Loading data ...') if raw_data_dir is None: raw_data_dir = os.path.join(data_dir, 'TIMIT25') data_dict, sr = cls.load_as_numpy_arrays(raw_data_dir) console.show_status('Wrapping data into signal set ...') signals = [] targets = [] groups = [] signal_index = 0 for i, word in enumerate(cls.PROPERTIES[pedia.classes]): group_indices = [] target = misc.convert_to_one_hot( [i], cls.PROPERTIES[SignalSet.NUM_CLASSES]) for array in data_dict[word]: signals.append(Signal(array, sr)) targets.append(target) group_indices.append(signal_index) signal_index += 1 groups.append(group_indices) data_set = SignalSet(signals, summ_dict={'targets': targets}, n_to_one=True, name='TIMIT25', converter=cls.converter, **cls.PROPERTIES) data_set.properties[data_set.GROUPS] = groups data_set.batch_preprocessor = cls.preprocessor if not force_create: data_set.save(data_path) console.show_status('Data set saved to `{}`'.format(data_path)) return data_set
def merge_classes(self, *class_indices): """Merge 2 classes and put them at the end of class list""" target_is_onehot = self.target_is_onehot # Update group and class name indices = [] names_to_merge = [] for i in reversed(sorted(class_indices)): indices.extend(self.properties[self.GROUPS].pop(i)) names_to_merge.append(self.properties[pedia.classes].pop(i)) self.properties[self.GROUPS].append(indices) self.properties[pedia.classes].append('/'.join(names_to_merge)) # Set num classes self.properties[self.NUM_CLASSES] = len(self.properties[self.GROUPS]) # Generate labels labels = np.zeros(shape=self.size, dtype=int) for i, group in enumerate(self.groups): labels[np.array(group)] = i # Set targets if target_is_onehot: self.targets = misc.convert_to_one_hot(labels, self.num_classes) else: self.targets = labels
def load_as_tframe_data(cls, data_dir, file_name=None, rgb=True, permute=False, permute_mark='alpha', **kwargs): assert rgb and not permute # Check file name if file_name is None: file_name = cls._get_file_name(rgb, permute, permute_mark) + '.tfds' data_path = os.path.join(data_dir, file_name) if os.path.exists(data_path): return SequenceSet.load(data_path) # If data does not exist, create a new data set console.show_status('Creating data ...') images, labels = CIFAR10.load_as_numpy_arrays(data_dir) # images (60000, 32, 32, 3), np.float64 images = images.reshape(60000, 32 * 32, 3 if rgb else 1) / 255. # permute images if necessary if permute: raise NotImplementedError # labels (60000, 10), np.int32 labels = convert_to_one_hot(labels, 10) # Wrap data into a Sequence Set features = [image for image in images] targets = [label for label in labels] data_set = SequenceSet(features, summ_dict={'targets': targets}, n_to_one=True, name='sCIFAR10') console.show_status('Saving data set ...') data_set.save(data_path) console.show_status('Data set saved to `{}`'.format(data_path)) return data_set
def generate(self, z=None, sample_num=1, labels=None): """ Generate samples. :param z: numpy array with shape (None, z_dim). If provided, sample_number will be ignored. Otherwise it will be generated randomly with shape (sample_num, z_dim) :param sample_num: positive integer. :param labels: If z is provided, classes should be None or a list with length z.shape[0]. If classes is None, labels will be generated randomly if self is a conditional model. :return: Samples generated with a shape of self._output_shape Examples: model.generate(labels=[1, 4, 5]) model.generate(sample_num=10) # Here labels is a list model.generate(z, labels) assert len(labels) == z.shape[0] """ # Check model and session if self._G is None: raise ValueError('Model not built yet') if self._session is None: self.launch_model(overwrite=False) assert isinstance(self._session, tf.Session) # Get sample number sample_num = (FLAGS.sample_num if FLAGS.sample_num > 0 else max(1, sample_num)) if self._conditional and not labels is None: labels = misc.convert_to_one_hot(labels, self._classes) sample_num = labels.shape[0] # Check input z z = self._random_z(sample_num) if z is None else z sample_num = z.shape[0] z_shape = list(z.shape[1:]) g_input_shape = self.G.inputs[0].get_shape().as_list()[1:] if z_shape != g_input_shape: raise ValueError("Shape of input z {} doesn't match the shape of " "generator's input {}".format(z_shape, g_input_shape)) # Check labels if self._conditional: # If labels is not None, they have already been converted if labels is None: labels = self._random_labels(sample_num) # Make sure z and one-hot labels can be concatenated if labels.shape[0] != sample_num: raise ValueError('!! Provided z and labels should stand for same ' 'number of samples but {} != {}'.format( sample_num, labels.shape[0])) # Generate samples feed_dict = {self.G.inputs[0]: z} if self._conditional: feed_dict[self._targets] = labels feed_dict.update(self._get_status_feed_dict(is_training=False)) samples = self._session.run(self._outputs, feed_dict=feed_dict) return samples
def _random_labels(self, sample_num): # Make sure self._classes makes sense assert self._conditional labels = np.random.randint(self._classes, size=sample_num) return misc.convert_to_one_hot(labels, self._classes)