def load_as_tframe_data(cls, data_dir): from .dataset import DataSet file_path = os.path.join(data_dir, cls.TFD_FILE_NAME) if os.path.exists(file_path): return DataSet.load(file_path) # If .tfd file does not exist, try to convert from raw data console.show_status('Trying to convert raw data to tframe DataSet ...') images, labels = cls.load_as_numpy_arrays(data_dir) data_set = DataSet(images, labels, name=cls.DATA_NAME, **cls.PROPERTIES) # Generate groups if necessary if data_set.num_classes is not None: groups = [] dense_labels = misc.convert_to_dense_labels(labels) for i in range(data_set.num_classes): # Find samples of class i and append to groups samples = list( np.argwhere([j == i for j in dense_labels]).ravel()) groups.append(samples) data_set.properties[data_set.GROUPS] = groups # Show status console.show_status('Successfully converted {} samples'.format( data_set.size)) # Save DataSet console.show_status('Saving data set ...') data_set.save(file_path) console.show_status('Data set saved to {}'.format(file_path)) return data_set
def load_as_tframe_data(cls, data_dir, file_name=None, size=512, unique_=True): # Check file_name if file_name is None: file_name = cls._get_file_name(size, unique_) data_path = os.path.join(data_dir, file_name) if os.path.exists(data_path): return DataSet.load(data_path) # If data does not exist, create a new one console.show_status('Making data ...') erg_list = ReberGrammar.make_strings( size, unique_, embedded=True, verbose=True) # Wrap erg into a DataSet features = [erg.one_hot for erg in erg_list] targets = [erg.transfer_prob for erg in erg_list] data_set = DataSet(features, targets, {'erg_list': erg_list}, name='Embedded Reber Grammar') console.show_status('Saving data set ...') data_set.save(data_path) console.show_status('Data set saved to {}'.format(data_path)) return data_set
def load_as_tframe_data(cls, data_dir, **kwargs): # Load directly if all files exists data_path = cls._get_data_paths(data_dir) if os.path.exists(data_path): data_set = DataSet.load(data_path) else: # If data does not exist, create from raw data console.show_status('Creating data sets ...') data, mapping = cls._load_raw_data(data_dir) x = np.array(data[:-1]).reshape(-1, 1) y = np.array(data[1:]).reshape(-1, 1) data_set = DataSet(x, y, name='Text8.char', mapping=mapping) # Save data set and show info data_set.save(data_path) console.show_status('{} saved to `{}`'.format( data_set.name, data_path)) # Show mapping size console.show_status( 'Data sets (containing {} different characters) loaded:'.format( len(data_set['mapping']))) return data_set
data_generator = DataGenerator(config=config, data_dir='../data/original_data/audio_train/', list_IDs=train.index, labels=train["label_idx"]) batches = len(train.index) // 64 for i in range(batches): feature, target = data_generator[i] if i == 0: features = feature targets = target else: features = np.concatenate((features, feature), axis=0) targets = np.concatenate((targets, target), axis=0) demo_data = DataSet(features=features, targets=targets) demo_data.save('../data/processed_data/demo_data_0') a = data_generator[2] b = a[0] c = 1 for i in range(len(val_set.features)): if i == 0: features = GPAT.length_adapted(val_set.features[i], audio_length) features = np.reshape(features, (1, -1)) # targets = batch_data[i].targets else: feature = GPAT.length_adapted(val_set.features[i], audio_length) feature = np.reshape(feature, (1, -1)) features = np.concatenate((features, feature), axis=0) # targets = np.concatenate((targets, batch_data[i].targets), axis=0) targets = val_set.targets features = np.expand_dims(features, axis=2)