def wavLCollection2datXy(wavLabelCollection, fs=None, featExtFun=None): """ returns the data object of a collection of labelled wavs ..... call type (classification) .... Parameters ---------- wavLabelCollection : list of tuples tu[0] : path to wav file tu[1] : wav label featExtFun : callable Return ------ > datO: myML.dataXy_names data """ datO = myML.dataXy_names() # initialise data object for wavF, l in wavLabelCollection: waveForm, fs = wav2waveform(wavF, fs=fs) # , normalize=False) M = featExtFun(waveForm) datO.addInstances(np.expand_dims(M.flatten(), axis=0), [l]) # print(np.shape(M0), datO.shape, np.shape(datO.y), os.path.basename(wavF)) return datO
def wavAnn2annSecs_dataXy_names(wavF, annF, featExtFun=None): """ Instantiates the annotated sections of a wavfile extracting a feature vector for each annotated section in the wav file meant to be used with feature extraction 'split' (n_n) used for call type classification Q (n_n) Parameters ---------- wavF: str path to wavefile annF: str path to wavfile featExtFun: callable feature extraction function Returns ------- datO: ML.dataXy_names classification features """ ### extract features for each annotated section segmentsLi, fs = auf.getAnnWavSec(wavF, annF) datO = myML.dataXy_names() ## for each annotation in the wavfile compute the features for annIndex in range(len(segmentsLi)): label = segmentsLi[annIndex]["label"] waveform = segmentsLi[annIndex]["waveform"] M = featExtFun(waveform) datO.addInstances(np.expand_dims(M.flatten(), axis=0), [np.array(label)]) return datO
def wavAnnCollection2annSecs_dataXy_names(wavAnnColl, featExtFun=None): """ Computes the X, y for a collection of annotated wav files for each annotated section in the wav file meant to be used with feature extraction 'split' ........O Used for call type classification Q.... Parameters ---------- < wavAnnColl : collection of annotated wavfiles < featExtFun : feature extraction function (callable) or a dictionary with the feature extraction settings featureExtractionParams = dict(zip(i, i)) Return ------ > datXy_names : features object """ datO = myML.dataXy_names() for wavF, annF in wavAnnColl[:]: # datO_test_new = wavAnn2sectionsXy( wF, annF, featExtFun=featExtFun) #wavPreprocessingT = wavPreprocessingFun ) datO_new = wavAnn2annSecs_dataXy_names( wavF, annF, featExtFun=featExtFun) # wavPreprocessingT = wavPreprocessingFun ) datO.addInstances(datO_new.X, datO_new.y_names) return datO
def extractFeaturesWDataAugmentation(sampSpace, feExFun, n_instances=10, **ensSettings): """Prepares data with the labels in wavAnnCollection, balancing the classes generating artificial samples Parameter --------- sampSpace: dict labels and waveforms (samples space) feExfun: callable n_instances: int ensemble_settings: dict kwards for the generation of artificial samples see exT.generateData_ensembleSettings(n_artificial_samples=1)""" datO = myML.dataXy_names() # data object for call in sampSpace.keys(): ### extract features from original samples dat = waveformsLi2DatXy_names(sampSpace[call], call, feExFun, nInstances=n_instances) datO.addInstances(dat.X, dat.y_names) n_art_instances = n_instances - dat.m_instances ### generate artificial samples datArt = waveformsLi2aritificial_DatXy_names( sampSpace[call], call, feExFun, n_instances=n_art_instances, **ensSettings) datO.addInstances(datArt.X, datArt.y_names) return datO
def wavAnnCollection2datXy(WavAnnCollection, feExFun=None, labelsHierarchy="default"): """ Extracts features and labels from wav-ann collection Parameters ---------- WavAnnCollection: list of tuples [(<path to wavF>, <path to annF>), ...] feExFun: callable feature extraction function labelsHierarchy: list labels in hierarchical order for setting the label of the instances (WALKING) Return ------ > datO : a file with the paths to the features and their labels """ if labelsHierarchy == "default": labelsHierarchy = ["c"] datO = myML.dataXy_names() # initialise data object for wavF, annF in WavAnnCollection: X, y0_names = getXy_fromWavFAnnF(wavF, annF, feExFun, labelsHierarchy) datO.addInstances(X, y0_names) return datO
def waveformsLi2aritificial_DatXy_names(waveformsLi, label, feExFun, n_instances, **ensemble_settings): """takes a list of waveforms, all with the same label, generates artificial samples, extracts features and returns data object Parameters --------- n_instances: int total number of artificial samples (instances) to generate ensemble_settings: dict kwargs for the generation of artificial samples see exT.generateData_ensembleSettings(n_artificial_samples=1) """ n_samps = len(waveformsLi) # indices to take different waveforms until created desired number of samples indices = np.arange(n_instances) % n_samps datO = myML.dataXy_names() # initialise data object for i in indices: waveform = waveformsLi[i] artificial_waveform = eff.generateWaveformEnsemble( waveform, **ensemble_settings)[0] art_samp = feExFun(artificial_waveform) datO.addInstances(np.expand_dims(art_samp.flatten(), axis=0), [np.array(label)]) return datO
def wavFAnnF2sections_wavsEnsemble_datXy_names(wavF, annF, featExtFun=None, wavPreprocessingT=None, ensembleSettings=None): """ Computes the features of each annotated section in the wav file ment to be used with feature extraction 'split' Parameters: ---------- wavFi: str path to wave file featExtFun: callable feature extraction function function wavPreprocessingT : callable applied before ensemble generation ensembleSettings: dict instructions for ensemble generation Return: ------ > datXy_names : data object """ ### check feature extraction function if not callable(featExtFun): # dictionary or None (default parameters) featExtFun = wavFeatureExtraction(featExtFun).featExtrFun() # default if not callable(wavPreprocessingT): wavPreprocessingT = lambda x, y: x if ensembleSettings is None: ensembleSettings = dict(effectName="addWhiteNoise", generate_data_grid=np.ones(1)) ### extract features for each annotated section segmentsLi, fs = auf.getAnnWavSec(wavF, annF) # assert sr==fs, "noise and signal waves have different sampling rates" datO = myML.dataXy_names() ## for each annotation in the wavfile compute the features for annIndex in range(len(segmentsLi)): label = segmentsLi[annIndex]["label"] waveform = segmentsLi[annIndex]["waveform"] ## waveform = wavPreprocessingT(waveform, fs) # preprocess waveform ## generate ensemble Y = eff.generateWaveformEnsemble(waveform, **ensembleSettings) ## noise # Extrac for i in range(len(Y)): # np.shape(Y)[0]): # M, _, _, featStr = featExtFun(Y[i], fs) # M = featExtFun(Y[i, :]) datO.addInstances(np.expand_dims(M.flatten(), axis=0), [np.array(label)]) return datO
def test_dataXy_filter(): # test dataXy loading data M = np.random.randint(1, 5, (4, 4)) labs = np.random.randint(0, 1, (4, )) datO = myML.dataXy_names(M, labs) np.testing.assert_array_equal(M, datO.X) # test None filter form the data_ynames class M_NoneFilt, labs_NoneFilt = datO.filterInstances(None) np.testing.assert_array_equal(M, M_NoneFilt) # filtering
def waveformsLi2DatXy_names(waveformsLi, label, feExFun, nInstances): """Extracts features from an waveformlist and returns data object""" n_samps = len(waveformsLi) stopIdx = None if n_samps > nInstances: stopIdx = nInstances datO = myML.dataXy_names() # initialise data object for waveform in waveformsLi[:stopIdx]: M = feExFun(waveform) datO.addInstances(np.expand_dims(M.flatten(), axis=0), [np.array(label)]) return datO
def wavAnnCollection2Xy_ensemble_datXy_names(wavAnnColl, featExtFun, wavPreprocessingT=None, ensembleSettings=None): datO = myML.dataXy_names() # initialise data object for wavF, annF in wavAnnColl[:]: datO_new = wavFAnnF2sections_wavsEnsemble_datXy_names( wavF, annF, featExtFun=featExtFun, wavPreprocessingT=wavPreprocessingT, ensembleSettings=ensembleSettings) datO.addInstances(datO_new.X, datO_new.y_names) return datO
def get_DataXy_fromWavFannF(wavF, annF, feExFun, labelsHierarchy): """ extracts features and its labels (ground truth) from wavF and annF files and returns its dataXy_names instance ---------- wavF: str annF: str feExFun: callable labelsHierarchy: list """ # np.loadtxt(collFi, delimiter='\t', dtype='|S') # print("\n==========\nTEST\n==============",wavF) waveForm, fs = wav2waveform(wavF) tf = len(waveForm) / fs M0 = feExFun(waveForm) m = len(M0) y0_names = auf.annotationsFi2instances(annF, m, tf, labelsHierarchy=labelsHierarchy) datO = myML.dataXy_names(M0, y0_names) return datO
# In[18]: # path to files train_collection = os.path.join(pDir, 'data/groupB_paths2files.csv') ## load data df = pd.read_csv(train_collection, usecols=['path_to_file', 'call']) wavColl = df.values # ## Extract features # In[7]: datO = myML.dataXy_names() datO_new = fex.wavLCollection2datXy( wavColl, featExtFun=feExFun, fs=fs ) datO.addInstances(datO_new.X, datO_new.y_names ) ## label transformer call_labels = [l[1] for l in wavColl] lt = myML.labelTransformer(call_labels) X = datO.X y_names = datO.y_names y = lt.nom2num(y_names) # In[8]: