Example #1
0
def _get_one_hot(fname, csv, lb_sheet):
    assert isinstance(csv, pd.DataFrame) and isinstance(lb_sheet, pd.DataFrame)
    labels = csv.loc[csv[du.FNAME] == fname][du.LABEL]
    assert len(labels) == 1
    label = list(labels)[0]
    index = list(lb_sheet.loc[lb_sheet[du.LABEL] == label]['index'])[0]
    return misc.convert_to_one_hot([index], 41)
Example #2
0
 def load_as_tframe_data(cls,
                         data_dir,
                         file_name=None,
                         permute=False,
                         permute_mark='alpha',
                         **kwargs):
     # Check file name
     if file_name is None:
         file_name = cls._get_file_name(permute, permute_mark) + '.tfds'
     data_path = os.path.join(data_dir, file_name)
     if os.path.exists(data_path): return SequenceSet.load(data_path)
     # If data does not exist, create a new data set
     console.show_status('Creating data ...')
     images, labels = MNIST.load_as_numpy_arrays(data_dir)
     # images (70000, 784, 1), np.float64
     images = images.reshape(images.shape[0], -1, 1) / 255.
     # permute images if necessary
     if permute:
         images = np.swapaxes(images, 0, 1)
         images = np.random.permutation(images)
         images = np.swapaxes(images, 0, 1)
     # labels (70000, 10), np.float64
     labels = convert_to_one_hot(labels, 10)
     # Wrap data into a Sequence Set
     features = [image for image in images]
     targets = [label for label in labels]
     data_set = SequenceSet(features,
                            summ_dict={'targets': targets},
                            n_to_one=True,
                            name='pMNIST')
     console.show_status('Saving data set ...')
     data_set.save(data_path)
     console.show_status('Data set saved to `{}`'.format(data_path))
     return data_set
Example #3
0
    def load(cls,
             data_dir,
             train_size,
             validate_size,
             test_size,
             flatten=False,
             one_hot=True):
        data_set = cls.load_as_tframe_data(data_dir)
        if flatten:
            data_set.features = data_set.features.reshape(data_set.size, -1)
        if one_hot:
            data_set.targets = misc.convert_to_one_hot(
                data_set.targets, data_set[data_set.NUM_CLASSES])

        return cls._split_and_return(data_set, train_size, validate_size,
                                     test_size)
Example #4
0
 def load_as_tframe_data(cls,
                         data_dir,
                         file_name=None,
                         raw_data_dir=None,
                         force_create=False):
     # Check file_name
     if file_name is None: file_name = 'timit-25.tfds'
     data_path = os.path.join(data_dir, file_name)
     if not force_create and os.path.exists(data_path):
         return SignalSet.load(data_path)
     # If data does not exist, create a new data set
     console.show_status('Loading data ...')
     if raw_data_dir is None:
         raw_data_dir = os.path.join(data_dir, 'TIMIT25')
     data_dict, sr = cls.load_as_numpy_arrays(raw_data_dir)
     console.show_status('Wrapping data into signal set ...')
     signals = []
     targets = []
     groups = []
     signal_index = 0
     for i, word in enumerate(cls.PROPERTIES[pedia.classes]):
         group_indices = []
         target = misc.convert_to_one_hot(
             [i], cls.PROPERTIES[SignalSet.NUM_CLASSES])
         for array in data_dict[word]:
             signals.append(Signal(array, sr))
             targets.append(target)
             group_indices.append(signal_index)
             signal_index += 1
         groups.append(group_indices)
     data_set = SignalSet(signals,
                          summ_dict={'targets': targets},
                          n_to_one=True,
                          name='TIMIT25',
                          converter=cls.converter,
                          **cls.PROPERTIES)
     data_set.properties[data_set.GROUPS] = groups
     data_set.batch_preprocessor = cls.preprocessor
     if not force_create:
         data_set.save(data_path)
         console.show_status('Data set saved to `{}`'.format(data_path))
     return data_set
Example #5
0
 def merge_classes(self, *class_indices):
     """Merge 2 classes and put them at the end of class list"""
     target_is_onehot = self.target_is_onehot
     # Update group and class name
     indices = []
     names_to_merge = []
     for i in reversed(sorted(class_indices)):
         indices.extend(self.properties[self.GROUPS].pop(i))
         names_to_merge.append(self.properties[pedia.classes].pop(i))
     self.properties[self.GROUPS].append(indices)
     self.properties[pedia.classes].append('/'.join(names_to_merge))
     # Set num classes
     self.properties[self.NUM_CLASSES] = len(self.properties[self.GROUPS])
     # Generate labels
     labels = np.zeros(shape=self.size, dtype=int)
     for i, group in enumerate(self.groups):
         labels[np.array(group)] = i
     # Set targets
     if target_is_onehot:
         self.targets = misc.convert_to_one_hot(labels, self.num_classes)
     else:
         self.targets = labels
Example #6
0
    def load_as_tframe_data(cls,
                            data_dir,
                            file_name=None,
                            rgb=True,
                            permute=False,
                            permute_mark='alpha',
                            **kwargs):
        assert rgb and not permute
        # Check file name
        if file_name is None:
            file_name = cls._get_file_name(rgb, permute,
                                           permute_mark) + '.tfds'
        data_path = os.path.join(data_dir, file_name)
        if os.path.exists(data_path): return SequenceSet.load(data_path)

        # If data does not exist, create a new data set
        console.show_status('Creating data ...')
        images, labels = CIFAR10.load_as_numpy_arrays(data_dir)
        # images (60000, 32, 32, 3), np.float64
        images = images.reshape(60000, 32 * 32, 3 if rgb else 1) / 255.
        # permute images if necessary
        if permute: raise NotImplementedError

        # labels (60000, 10), np.int32
        labels = convert_to_one_hot(labels, 10)
        # Wrap data into a Sequence Set
        features = [image for image in images]
        targets = [label for label in labels]
        data_set = SequenceSet(features,
                               summ_dict={'targets': targets},
                               n_to_one=True,
                               name='sCIFAR10')
        console.show_status('Saving data set ...')
        data_set.save(data_path)
        console.show_status('Data set saved to `{}`'.format(data_path))
        return data_set
Example #7
0
  def generate(self, z=None, sample_num=1, labels=None):
    """
    Generate samples. 
    :param z: numpy array with shape (None, z_dim). If provided, sample_number
               will be ignored. Otherwise it will be generated randomly with
               shape (sample_num, z_dim)
    :param sample_num: positive integer. 
    :param labels: If z is provided, classes should be None or a list with
                     length z.shape[0]. If classes is None, labels will be 
                     generated randomly if self is a conditional model.
    :return: Samples generated with a shape of self._output_shape
    
    Examples:  model.generate(labels=[1, 4, 5])
    
               model.generate(sample_num=10)
               
               # Here labels is a list
               model.generate(z, labels)
               assert len(labels) == z.shape[0]
    """
    # Check model and session
    if self._G is None:
      raise ValueError('Model not built yet')
    if self._session is None:
      self.launch_model(overwrite=False)
    assert isinstance(self._session, tf.Session)

    # Get sample number
    sample_num = (FLAGS.sample_num if FLAGS.sample_num > 0 else
                  max(1, sample_num))
    if self._conditional and not labels is None:
      labels = misc.convert_to_one_hot(labels, self._classes)
      sample_num = labels.shape[0]

    # Check input z
    z = self._random_z(sample_num) if z is None else z
    sample_num = z.shape[0]
    z_shape = list(z.shape[1:])
    g_input_shape = self.G.inputs[0].get_shape().as_list()[1:]
    if z_shape != g_input_shape:
      raise ValueError("Shape of input z {} doesn't match the shape of "
                        "generator's input {}".format(z_shape, g_input_shape))
    # Check labels
    if self._conditional:
      # If labels is not None, they have already been converted
      if labels is None:
        labels = self._random_labels(sample_num)
      # Make sure z and one-hot labels can be concatenated
      if labels.shape[0] != sample_num:
        raise ValueError('!! Provided z and labels should stand for same '
                         'number of samples but {} != {}'.format(
          sample_num, labels.shape[0]))

    # Generate samples
    feed_dict = {self.G.inputs[0]: z}
    if self._conditional:
      feed_dict[self._targets] = labels
    feed_dict.update(self._get_status_feed_dict(is_training=False))
    samples = self._session.run(self._outputs, feed_dict=feed_dict)

    return samples
Example #8
0
  def _random_labels(self, sample_num):
    # Make sure self._classes makes sense
    assert self._conditional
    labels = np.random.randint(self._classes, size=sample_num)

    return misc.convert_to_one_hot(labels, self._classes)