Example #1
0
    def load_as_tframe_data(cls, data_dir):
        from .dataset import DataSet
        file_path = os.path.join(data_dir, cls.TFD_FILE_NAME)
        if os.path.exists(file_path): return DataSet.load(file_path)

        # If .tfd file does not exist, try to convert from raw data
        console.show_status('Trying to convert raw data to tframe DataSet ...')
        images, labels = cls.load_as_numpy_arrays(data_dir)
        data_set = DataSet(images,
                           labels,
                           name=cls.DATA_NAME,
                           **cls.PROPERTIES)

        # Generate groups if necessary
        if data_set.num_classes is not None:
            groups = []
            dense_labels = misc.convert_to_dense_labels(labels)
            for i in range(data_set.num_classes):
                # Find samples of class i and append to groups
                samples = list(
                    np.argwhere([j == i for j in dense_labels]).ravel())
                groups.append(samples)
            data_set.properties[data_set.GROUPS] = groups

        # Show status
        console.show_status('Successfully converted {} samples'.format(
            data_set.size))
        # Save DataSet
        console.show_status('Saving data set ...')
        data_set.save(file_path)
        console.show_status('Data set saved to {}'.format(file_path))
        return data_set
Example #2
0
 def _load_data_set(file_name):
     assert isinstance(file_name, str)
     extension = file_name.split('.')[-1]
     if extension == DataSet.EXTENSION:
         return DataSet.load(file_name)
     elif extension == SignalSet.EXTENSION:
         return SignalSet.load(file_name)
     else:
         raise TypeError(
             '!! Can not load file with extension .{}'.format(extension))
Example #3
0
  def load_data_set(self, _):
    filename = filedialog.askopenfilename(
      initialdir=self.last_dir, title='Load data set',
      filetypes=(("TFData files", '*.tfd'),))
    if filename == '':
      return

    self.filename = filename
    self.set_data(DataSet.load(filename))
    self._update_title()

    # Print status
    print(">> Loaded data set '{}'".format(filename))
def load_data(path, csv_path, fold=0):
    # TODO:
    train = pd.read_csv(csv_path)
    LABELS = list(train.label.unique())
    label_idx = {label: i for i, label in enumerate(LABELS)}
    train.set_index("fname", inplace=True)
    train["label_idx"] = train.label.apply(lambda x: label_idx[x])
    # split the train_set and the val_set
    skf = StratifiedKFold(train.label_idx, n_folds=10)

    for i, (train_split, val_split) in enumerate(skf):
        if i == fold:
            train_split_0 = train_split
            val_split_0 = val_split
            break
    audio_length = 32000
    data_set = DataSet.load(path)
    assert isinstance(data_set, DataSet)

    train_split_data = Gpat_set.split_data_set(train_split_0, data_set)
    val_set = Gpat_set.split_data_set(val_split_0, data_set)
    raw_val_set = val_set
    raw_val_set.properties[raw_val_set.NUM_CLASSES] = 41
    train_set = Gpat_set(features=train_split_data.features,
                         targets=train_split_data.targets,
                         NUM_CLASSES=41)

    train_set.init_groups()
    for i in range(len(val_set.features)):
        if i == 0:
            features = GPAT.length_adapted(val_set.features[i], audio_length)
            mfccs = librosa.feature.mfcc(features, 16000, n_mfcc=50)
            mfccs = np.expand_dims(mfccs, axis=0)
            features = np.reshape(features, (1, -1))
        # targets = batch_data[i].targets
        else:
            feature = GPAT.length_adapted(val_set.features[i], audio_length)
            mfcc = librosa.feature.mfcc(feature, 16000, n_mfcc=50)
            mfcc = np.expand_dims(mfcc, axis=0)
            mfccs = np.concatenate((mfccs, mfcc), axis=0)
            feature = np.reshape(feature, (1, -1))
            features = np.concatenate((features, feature), axis=0)
    targets = val_set.targets
    features = np.expand_dims(features, axis=2)
    mfccs = np.expand_dims(mfccs, axis=-1)
    val_set = DataSet(features, targets, data_dict={'mfcc': mfccs})
    test_set = val_set
    return train_set, val_set, test_set, raw_val_set
Example #5
0
    def set_data(self, data_set):
        if data_set is not None:
            # If a path is given
            if isinstance(data_set, six.string_types):
                data_set = DataSet.load(data_set)
            if not isinstance(data_set, DataSet):
                raise TypeError(
                    '!! Data set must be an instance of tframe DataSet')
            if not data_set.is_regular_array:
                data_set = data_set.stack
            self.data_set = data_set
            self._set_cursor(0)
            if self.data_set.targets is not None:
                self.labels = misc.convert_to_dense_labels(
                    self.data_set.targets)
            console.show_status('Data set set to ImageViewer')

            # Refresh image viewer
            self.refresh()
Example #6
0
  def load_as_tframe_data(cls, data_dir, file_name=None, size=512,
                          unique_=True):
    # Check file_name
    if file_name is None: file_name = cls._get_file_name(size, unique_)
    data_path = os.path.join(data_dir, file_name)
    if os.path.exists(data_path): return DataSet.load(data_path)
    # If data does not exist, create a new one
    console.show_status('Making data ...')
    erg_list = ReberGrammar.make_strings(
      size, unique_, embedded=True, verbose=True)

    # Wrap erg into a DataSet
    features = [erg.one_hot for erg in erg_list]
    targets = [erg.transfer_prob for erg in erg_list]
    data_set = DataSet(features, targets, {'erg_list': erg_list},
                       name='Embedded Reber Grammar')
    console.show_status('Saving data set ...')
    data_set.save(data_path)
    console.show_status('Data set saved to {}'.format(data_path))
    return  data_set
Example #7
0
    def set_data(self, data_set):
        if data_set is not None:
            # If a path is given
            if isinstance(data_set, six.string_types):
                data_set = DataSet.load(data_set)
            if not isinstance(data_set, DataSet):
                raise TypeError(
                    '!! Data set must be an instance of tframe DataSet')
            if not data_set.is_regular_array:
                data_set = data_set.stack
            self.data_set = data_set
            self._set_cursor(0)
            # For DataSet like MNIST and CIFAR-XXX
            if self.data_set.targets is not None:
                if len(self.data_set.targets.shape) == 1:
                    self.labels = self.data_set.targets
                elif len(self.data_set.targets.shape) == 2:
                    self.labels = misc.convert_to_dense_labels(
                        self.data_set.targets).flatten()
            # Consider DataSets in image segmentation tasks
            interleave_key = self.kwargs.get('interleave_key', None)
            if interleave_key is not None:
                if not interleave_key in data_set.data_dict.keys():
                    raise KeyError('!! Can not find `{}` in DataSet'.format(
                        interleave_key))
                else:
                    shadows = getattr(data_set, interleave_key)
                    features = data_set.features
                    assert shadows.shape == features.shape
                    images = []
                    for x, y, in zip(features, shadows):
                        images.append(np.reshape(x, (1, ) + x.shape))
                        images.append(np.reshape(y, (1, ) + y.shape))
                    data_set.features = np.concatenate(images, axis=0)

            console.show_status('Data set set to ImageViewer')

            # Refresh image viewer
            self.refresh()
Example #8
0
    def load_as_tframe_data(cls, data_dir, **kwargs):

        # Load directly if all files exists
        data_path = cls._get_data_paths(data_dir)
        if os.path.exists(data_path):
            data_set = DataSet.load(data_path)
        else:
            # If data does not exist, create from raw data
            console.show_status('Creating data sets ...')
            data, mapping = cls._load_raw_data(data_dir)
            x = np.array(data[:-1]).reshape(-1, 1)
            y = np.array(data[1:]).reshape(-1, 1)
            data_set = DataSet(x, y, name='Text8.char', mapping=mapping)
            # Save data set and show info
            data_set.save(data_path)
            console.show_status('{} saved to `{}`'.format(
                data_set.name, data_path))

        # Show mapping size
        console.show_status(
            'Data sets (containing {} different characters) loaded:'.format(
                len(data_set['mapping'])))

        return data_set