예제 #1
0
    def load_as_tframe_data(cls, data_dir):
        from .dataset import DataSet
        file_path = os.path.join(data_dir, cls.TFD_FILE_NAME)
        if os.path.exists(file_path): return DataSet.load(file_path)

        # If .tfd file does not exist, try to convert from raw data
        console.show_status('Trying to convert raw data to tframe DataSet ...')
        images, labels = cls.load_as_numpy_arrays(data_dir)
        data_set = DataSet(images,
                           labels,
                           name=cls.DATA_NAME,
                           **cls.PROPERTIES)

        # Generate groups if necessary
        if data_set.num_classes is not None:
            groups = []
            dense_labels = misc.convert_to_dense_labels(labels)
            for i in range(data_set.num_classes):
                # Find samples of class i and append to groups
                samples = list(
                    np.argwhere([j == i for j in dense_labels]).ravel())
                groups.append(samples)
            data_set.properties[data_set.GROUPS] = groups

        # Show status
        console.show_status('Successfully converted {} samples'.format(
            data_set.size))
        # Save DataSet
        console.show_status('Saving data set ...')
        data_set.save(file_path)
        console.show_status('Data set saved to {}'.format(file_path))
        return data_set
예제 #2
0
  def load_as_tframe_data(cls, data_dir, file_name=None, size=512,
                          unique_=True):
    # Check file_name
    if file_name is None: file_name = cls._get_file_name(size, unique_)
    data_path = os.path.join(data_dir, file_name)
    if os.path.exists(data_path): return DataSet.load(data_path)
    # If data does not exist, create a new one
    console.show_status('Making data ...')
    erg_list = ReberGrammar.make_strings(
      size, unique_, embedded=True, verbose=True)

    # Wrap erg into a DataSet
    features = [erg.one_hot for erg in erg_list]
    targets = [erg.transfer_prob for erg in erg_list]
    data_set = DataSet(features, targets, {'erg_list': erg_list},
                       name='Embedded Reber Grammar')
    console.show_status('Saving data set ...')
    data_set.save(data_path)
    console.show_status('Data set saved to {}'.format(data_path))
    return  data_set
예제 #3
0
    def load_as_tframe_data(cls, data_dir, **kwargs):

        # Load directly if all files exists
        data_path = cls._get_data_paths(data_dir)
        if os.path.exists(data_path):
            data_set = DataSet.load(data_path)
        else:
            # If data does not exist, create from raw data
            console.show_status('Creating data sets ...')
            data, mapping = cls._load_raw_data(data_dir)
            x = np.array(data[:-1]).reshape(-1, 1)
            y = np.array(data[1:]).reshape(-1, 1)
            data_set = DataSet(x, y, name='Text8.char', mapping=mapping)
            # Save data set and show info
            data_set.save(data_path)
            console.show_status('{} saved to `{}`'.format(
                data_set.name, data_path))

        # Show mapping size
        console.show_status(
            'Data sets (containing {} different characters) loaded:'.format(
                len(data_set['mapping'])))

        return data_set
예제 #4
0
data_generator = DataGenerator(config=config,
                               data_dir='../data/original_data/audio_train/',
                               list_IDs=train.index,
                               labels=train["label_idx"])
batches = len(train.index) // 64
for i in range(batches):
    feature, target = data_generator[i]
    if i == 0:
        features = feature
        targets = target
    else:
        features = np.concatenate((features, feature), axis=0)
        targets = np.concatenate((targets, target), axis=0)

demo_data = DataSet(features=features, targets=targets)
demo_data.save('../data/processed_data/demo_data_0')
a = data_generator[2]
b = a[0]
c = 1
for i in range(len(val_set.features)):
    if i == 0:
        features = GPAT.length_adapted(val_set.features[i], audio_length)
        features = np.reshape(features, (1, -1))
    # targets = batch_data[i].targets
    else:
        feature = GPAT.length_adapted(val_set.features[i], audio_length)
        feature = np.reshape(feature, (1, -1))
        features = np.concatenate((features, feature), axis=0)
# targets = np.concatenate((targets, batch_data[i].targets), axis=0)
targets = val_set.targets
features = np.expand_dims(features, axis=2)