def load_as_tframe_data(cls, data_dir): from .dataset import DataSet file_path = os.path.join(data_dir, cls.TFD_FILE_NAME) if os.path.exists(file_path): return DataSet.load(file_path) # If .tfd file does not exist, try to convert from raw data console.show_status('Trying to convert raw data to tframe DataSet ...') images, labels = cls.load_as_numpy_arrays(data_dir) data_set = DataSet(images, labels, name=cls.DATA_NAME, **cls.PROPERTIES) # Generate groups if necessary if data_set.num_classes is not None: groups = [] dense_labels = misc.convert_to_dense_labels(labels) for i in range(data_set.num_classes): # Find samples of class i and append to groups samples = list( np.argwhere([j == i for j in dense_labels]).ravel()) groups.append(samples) data_set.properties[data_set.GROUPS] = groups # Show status console.show_status('Successfully converted {} samples'.format( data_set.size)) # Save DataSet console.show_status('Saving data set ...') data_set.save(file_path) console.show_status('Data set saved to {}'.format(file_path)) return data_set
def _load_data_set(file_name): assert isinstance(file_name, str) extension = file_name.split('.')[-1] if extension == DataSet.EXTENSION: return DataSet.load(file_name) elif extension == SignalSet.EXTENSION: return SignalSet.load(file_name) else: raise TypeError( '!! Can not load file with extension .{}'.format(extension))
def load_data_set(self, _): filename = filedialog.askopenfilename( initialdir=self.last_dir, title='Load data set', filetypes=(("TFData files", '*.tfd'),)) if filename == '': return self.filename = filename self.set_data(DataSet.load(filename)) self._update_title() # Print status print(">> Loaded data set '{}'".format(filename))
def load_data(path, csv_path, fold=0): # TODO: train = pd.read_csv(csv_path) LABELS = list(train.label.unique()) label_idx = {label: i for i, label in enumerate(LABELS)} train.set_index("fname", inplace=True) train["label_idx"] = train.label.apply(lambda x: label_idx[x]) # split the train_set and the val_set skf = StratifiedKFold(train.label_idx, n_folds=10) for i, (train_split, val_split) in enumerate(skf): if i == fold: train_split_0 = train_split val_split_0 = val_split break audio_length = 32000 data_set = DataSet.load(path) assert isinstance(data_set, DataSet) train_split_data = Gpat_set.split_data_set(train_split_0, data_set) val_set = Gpat_set.split_data_set(val_split_0, data_set) raw_val_set = val_set raw_val_set.properties[raw_val_set.NUM_CLASSES] = 41 train_set = Gpat_set(features=train_split_data.features, targets=train_split_data.targets, NUM_CLASSES=41) train_set.init_groups() for i in range(len(val_set.features)): if i == 0: features = GPAT.length_adapted(val_set.features[i], audio_length) mfccs = librosa.feature.mfcc(features, 16000, n_mfcc=50) mfccs = np.expand_dims(mfccs, axis=0) features = np.reshape(features, (1, -1)) # targets = batch_data[i].targets else: feature = GPAT.length_adapted(val_set.features[i], audio_length) mfcc = librosa.feature.mfcc(feature, 16000, n_mfcc=50) mfcc = np.expand_dims(mfcc, axis=0) mfccs = np.concatenate((mfccs, mfcc), axis=0) feature = np.reshape(feature, (1, -1)) features = np.concatenate((features, feature), axis=0) targets = val_set.targets features = np.expand_dims(features, axis=2) mfccs = np.expand_dims(mfccs, axis=-1) val_set = DataSet(features, targets, data_dict={'mfcc': mfccs}) test_set = val_set return train_set, val_set, test_set, raw_val_set
def set_data(self, data_set): if data_set is not None: # If a path is given if isinstance(data_set, six.string_types): data_set = DataSet.load(data_set) if not isinstance(data_set, DataSet): raise TypeError( '!! Data set must be an instance of tframe DataSet') if not data_set.is_regular_array: data_set = data_set.stack self.data_set = data_set self._set_cursor(0) if self.data_set.targets is not None: self.labels = misc.convert_to_dense_labels( self.data_set.targets) console.show_status('Data set set to ImageViewer') # Refresh image viewer self.refresh()
def load_as_tframe_data(cls, data_dir, file_name=None, size=512, unique_=True): # Check file_name if file_name is None: file_name = cls._get_file_name(size, unique_) data_path = os.path.join(data_dir, file_name) if os.path.exists(data_path): return DataSet.load(data_path) # If data does not exist, create a new one console.show_status('Making data ...') erg_list = ReberGrammar.make_strings( size, unique_, embedded=True, verbose=True) # Wrap erg into a DataSet features = [erg.one_hot for erg in erg_list] targets = [erg.transfer_prob for erg in erg_list] data_set = DataSet(features, targets, {'erg_list': erg_list}, name='Embedded Reber Grammar') console.show_status('Saving data set ...') data_set.save(data_path) console.show_status('Data set saved to {}'.format(data_path)) return data_set
def set_data(self, data_set): if data_set is not None: # If a path is given if isinstance(data_set, six.string_types): data_set = DataSet.load(data_set) if not isinstance(data_set, DataSet): raise TypeError( '!! Data set must be an instance of tframe DataSet') if not data_set.is_regular_array: data_set = data_set.stack self.data_set = data_set self._set_cursor(0) # For DataSet like MNIST and CIFAR-XXX if self.data_set.targets is not None: if len(self.data_set.targets.shape) == 1: self.labels = self.data_set.targets elif len(self.data_set.targets.shape) == 2: self.labels = misc.convert_to_dense_labels( self.data_set.targets).flatten() # Consider DataSets in image segmentation tasks interleave_key = self.kwargs.get('interleave_key', None) if interleave_key is not None: if not interleave_key in data_set.data_dict.keys(): raise KeyError('!! Can not find `{}` in DataSet'.format( interleave_key)) else: shadows = getattr(data_set, interleave_key) features = data_set.features assert shadows.shape == features.shape images = [] for x, y, in zip(features, shadows): images.append(np.reshape(x, (1, ) + x.shape)) images.append(np.reshape(y, (1, ) + y.shape)) data_set.features = np.concatenate(images, axis=0) console.show_status('Data set set to ImageViewer') # Refresh image viewer self.refresh()
def load_as_tframe_data(cls, data_dir, **kwargs): # Load directly if all files exists data_path = cls._get_data_paths(data_dir) if os.path.exists(data_path): data_set = DataSet.load(data_path) else: # If data does not exist, create from raw data console.show_status('Creating data sets ...') data, mapping = cls._load_raw_data(data_dir) x = np.array(data[:-1]).reshape(-1, 1) y = np.array(data[1:]).reshape(-1, 1) data_set = DataSet(x, y, name='Text8.char', mapping=mapping) # Save data set and show info data_set.save(data_path) console.show_status('{} saved to `{}`'.format( data_set.name, data_path)) # Show mapping size console.show_status( 'Data sets (containing {} different characters) loaded:'.format( len(data_set['mapping']))) return data_set