def set_data(self, paths): # audio chain = ProcessingChain().add(WavDatareader()) self.add( 'data', FolderDictSeqAbstract(paths['data'], file_info_save_path=paths['feat'], map_fct=chain)) # add labels self.add('binary_anomaly', self._get_binary_anomaly(paths)) self.add('group', self['data']['subdb']) return self
def set_data(self, paths): # audio chain = ProcessingChain().add(WavDatareader()) from dabstract.dataset.helpers import FolderDictSeqAbstract #self.add_subdict_from_folder('data', paths['data'], map_fct=chain, save_path=paths['data']) tmp = FolderDictSeqAbstract(paths['data'], map_fct=chain, save_path=paths['data']) self.add('data', tmp) # add labels self.add('binary_anomaly', self._get_binary_anomaly(paths), lazy=False) self.add('group', self['data']['subdb'], lazy=False) return self
def set_data(self, paths): """Set the data""" # audio chain = ProcessingChain().add(WavDatareader(select_channel=0)) from dabstract.dataset.helpers import FolderDictSeqAbstract self.add( "audio", FolderDictSeqAbstract( paths["data"], map_fct=chain, file_info_save_path=os.path.join(paths["feat"], self.__class__.__name__, "audio", "raw"), ), ) # get meta if os.path.exists(os.path.join(paths["meta"], "meta_dabstract.txt")): labels = pandas.read_csv(os.path.join(paths["meta"], "meta_dabstract.txt"), delimiter="\t", header=None) else: labels = pandas.read_csv(os.path.join(paths["meta"], "meta.txt"), delimiter="\t", header=None) # make sure audio and meta is aligned filenames = labels[0].to_list() resort = np.array([ filenames.index("audio/" + filename) for filename in self["audio"]["example"] ]) labels = labels.reindex(resort) labels.to_csv(os.path.join(paths["meta"], "meta_dabstract.txt"), sep="\t", header=False, index=False) # add labels self.add("identifier", labels[2].to_list(), lazy=False) #self.add("source", [filename for filename in filenames], lazy=False) self.add("scene", labels[1].to_list(), lazy=False) self.add("scene_id", stringlist2ind(self['scene']), lazy=False) self.add("group", stringlist2ind(self['identifier']), lazy=False) return self
def test_DataAbstract(): from dabstract.abstract import DataAbstract, DictSeqAbstract, MapAbstract from dabstract.dataprocessor.processing_chain import Processor, ProcessingChain # check for multi-indexing on a List data = ['1', '2', '3', '4'] DA = DataAbstract(data) assert DA[0] == '1' assert DA[-1] == '4' assert DA[1:3] == ['2', '3'] assert DA[:] == ['1', '2', '3', '4'] # check for multiindexing on a DictSeqAbstract DSA = DictSeqAbstract().add_dict({ 'test1': ['1', '2', '3'], 'test2': np.zeros(3) }) DA = DataAbstract(DSA) assert DA[0] == {'test1': '1', 'test2': 0.0} assert DA[-1] == {'test1': '3', 'test2': 0.0} assert DA[0:2] == [{ 'test1': '1', 'test2': 0.0 }, { 'test1': '2', 'test2': 0.0 }] assert DA[:] == [{ 'test1': '1', 'test2': 0.0 }, { 'test1': '2', 'test2': 0.0 }, { 'test1': '3', 'test2': 0.0 }] # check for multiindexing and multiprocessing on a DictSeqAbstract DA = DataAbstract(DSA, workers=2, buffer_len=2) assert DA[0] == {'test1': '1', 'test2': 0.0} assert DA[-1] == {'test1': '3', 'test2': 0.0} assert DA[0:2] == [{ 'test1': '1', 'test2': 0.0 }, { 'test1': '2', 'test2': 0.0 }] assert DA[:] == [{ 'test1': '1', 'test2': 0.0 }, { 'test1': '2', 'test2': 0.0 }, { 'test1': '3', 'test2': 0.0 }] # check output of the Generator tmp = [] for example in DA: tmp.append(example) assert tmp == [{ 'test1': '1', 'test2': 0.0 }, { 'test1': '2', 'test2': 0.0 }, { 'test1': '3', 'test2': 0.0 }] # check gets data = [1, 2, 3, 4] class Something(Processor): def process(self, data): return data * 2, {'test': 0} data_map = MapAbstract(data, ProcessingChain().add(Something())) DA = DataAbstract(data_map) assert DA[0] == DA.get(0) assert DA[-1] == DA.get(-1) assert np.all(DA[0:2] == np.array([[2.], [4.]])) assert np.all(DA[:] == np.array([[2.], [4.], [6.], [8.]])) assert np.all(DA.get([0, 1, 2]) == np.array([[2.], [4.], [6.]])) # check return info along with generators assert DA.get(0, return_info=True) == (2, {'test': 0, 'output_shape': ()}) tmp = [] for example in DA.get(return_generator=True, return_info=True): tmp.append(example) assert tmp == [(2, { 'test': 0, 'output_shape': () }), (4, { 'test': 0, 'output_shape': () }), (6, { 'test': 0, 'output_shape': () }), (8, { 'test': 0, 'output_shape': () })] tmp = [] for example in DA.get([0, 2], return_generator=True, return_info=True): tmp.append(example) assert tmp == [(2, { 'test': 0, 'output_shape': () }), (6, { 'test': 0, 'output_shape': () })]
def test_Map(): """Test Map""" from dabstract.abstract import Map # data init data = [1, 2, 3, 4] ## Map using lambda function # eager mapping lambda function map_eager_data_lambda = Map(data, (lambda x: 2 * x), lazy=False) # lazy mapping lambda function map_lazy_data_lambda = Map(data, (lambda x: 2 * x), lazy=True) # checks assert map_eager_data_lambda[0] == 2 assert map_eager_data_lambda[-1] == 8 assert map_lazy_data_lambda[0] == 2 assert map_lazy_data_lambda[-1] == 8 ## Map using defined function def some_function(input, multiplier, logarithm=False): output = input * multiplier if logarithm: output = np.log10(output) return output # eager mapping defined function map_eager_data_def = Map(data, some_function, multiplier=2, logarithm=True, lazy=False) # lazy mapping defined function map_lazy_data_def = Map(data, some_function, multiplier=2, logarithm=True, lazy=True) # checks assert map_eager_data_def[0] == 0.3010299956639812 assert map_eager_data_def[-1] == 0.9030899869919435 assert map_lazy_data_def[0] == 0.3010299956639812 assert map_lazy_data_def[-1] == 0.9030899869919435 ## Map using ProcessingChain class custom_processor(Processor): def process(self, data, **kwargs): return data + 1, {'multiplier': 3} class custom_processor2(Processor): def process(self, data, **kwargs): return data * kwargs['multiplier'], {} dp = ProcessingChain() dp.add(custom_processor) dp.add(custom_processor2) # eager mapping using processingchain map_eager_data_dp = Map(data, map_fct=dp, lazy=False) # lazy mapping using processingchain map_lazy_data_dp = Map(data, map_fct=dp, lazy=True) # checks assert map_eager_data_dp[0] == 6 assert map_eager_data_dp[-1] == 15 assert map_lazy_data_dp[0] == 6 assert map_lazy_data_dp[-1] == 15 assert map_lazy_data_dp.get(-1, return_info=True) == (15, { 'multiplier': 3, 'output_shape': () }) ## Map using lambda function with additional information # eager mapping using lambda function and information map_eager_data_lambda_info = Map(data, (lambda x: 2 * x), info=({ 'test': 1 }, { 'test': 2 }, { 'test': 'a' }, { 'test': 'b' }), lazy=False) # lazy mapping using lambda function and information map_lazy_data_lambda_info = Map(data, (lambda x: 2 * x), info=({ 'test': 1 }, { 'test': 2 }, { 'test': 'a' }, { 'test': 'b' }), lazy=True) # checks assert map_eager_data_lambda_info[0] == 2 assert map_eager_data_lambda_info[-1] == 8 assert map_lazy_data_lambda_info.get(0, return_info=True) == (2, { 'test': 1 }) assert map_lazy_data_lambda_info.get(-1, return_info=True) == (8, { 'test': 'b' })