예제 #1
0
def wavLCollection2datXy(wavLabelCollection, fs=None, featExtFun=None):
    """
    returns the data object of a collection of labelled wavs

        ..... call type (classification) ....

    Parameters
    ----------
    wavLabelCollection : list of tuples
        tu[0] : path to wav file
        tu[1] : wav label
    featExtFun : callable

    Return
    ------
    > datO: myML.dataXy_names
        data
    """

    datO = myML.dataXy_names()  # initialise data object

    for wavF, l in wavLabelCollection:
        waveForm, fs = wav2waveform(wavF, fs=fs)  # , normalize=False)
        M = featExtFun(waveForm)
        datO.addInstances(np.expand_dims(M.flatten(), axis=0), [l])

        # print(np.shape(M0), datO.shape, np.shape(datO.y), os.path.basename(wavF))
    return datO
예제 #2
0
def WSD2predictAnnotations(wavF,
                           annWSD1,
                           feExtFun,
                           lt,
                           WSD2_clf,
                           outF,
                           readSections,
                           keepSections='default',
                           dt=0):
    """Generate annotations using the WSD2
    reads the predicted sections from WSD1 to predicts
    the finer structure of the calls
    with clf trained with a smaller nTextWS

    Parameters
    ----------
    wavF: str
        wavefile name
    feExFun: callable
        feature extraction function
    lt: LabelEncoder
        label transformation object
    WSD2_clf: estimator
        model for estimating predictions
    outF: str
        name of the output annotations
    readSections: list like object
        array with the ann sections from WSD1 we want to reinterpret, default = ['c']
    keepSections: list like object
        array with the ann sections we want to print
    dt: float
        time buffer for reading around the desired annotation section
    """
    if keepSections is 'default':
        keepSections = ['c']
    try:
        os.remove(outF)
    except OSError:
        pass
    waveform, fs = sT.wav2waveform(wavF)  # load waveform
    A = annT.anns2array(annWSD1)  # load annotations
    for t0i, t0f, l0 in A[:]:  # for each ann section
        if l0 in readSections:  # if section of interest (c)
            thisWaveform = auf.getWavSec(waveform, fs, t0i - dt, t0f + dt)
            ## predict annotations
            T, L = predictAnnotations(thisWaveform, fs, feExtFun, lt,
                                      WSD2_clf)  #annSections=keepSections)
            newT = T + t0i - dt  # relative to the orginal ann sections
            mask = np.in1d(L, keepSections)
            outF = annT.save_TLannotations(newT[mask, :],
                                           L[mask],
                                           outF,
                                           opening_mode='a')
    return outF
예제 #3
0
def WSD2predictionsTLanns(wavF,
                          annWSD1,
                          feExtFun,
                          lt,
                          WSD2_clf,
                          readSections,
                          dt=0):  #keepSections=None
    """Generate annotations using the WSD2
    reads the predicted sections from WSD1 to predicts
    the finer structure of the calls
    with clf trained with a smaller nTextWS

    Parameters
    ----------
    wavF: str
        wavefile name
    feExFun: callable
        feature extraction function
    lt: LabelEncoder
        label transformation object
    WSD2_clf: estimator
        model for estimating predictions
    readSections: list like object
        array with the ann sections from WSD1 we want to reinterpret
    dt: float
        time buffer for reading around the desired annotation section
    keepSections: (DEPRECATED) list like object
    """
    waveform, fs = sT.wav2waveform(wavF)  # load waveform
    A = annT.anns2array(annWSD1)  # load annotations

    newT_list = []
    newL_list = []
    for t0i, t0f, l0 in A[:]:  # for each ann section
        if l0 in readSections:  # if section of interest (c)
            thisWaveform = auf.getWavSec(waveform, fs, t0i - dt, t0f + dt)
            ## predict annotations
            secT, secL = predictAnnotations(thisWaveform, fs, feExtFun, lt,
                                            WSD2_clf)
            newSectT = secT + t0i - dt  # relative to the orginal ann sections
            newT_list.append(newSectT)
            newL_list.append(secL)
            #outF = annT.save_TLannotations(newT, L, outF, opening_mode='a')
    newL = np.hstack((newL_list))
    newT = np.vstack((newT_list))
    return newT, newL
예제 #4
0
def TLpredictAnnotationSections(wavF,
                                annF,
                                clf,
                                featExtFun,
                                lt,
                                printProbs=False,
                                readSections=None,
                                printreadSectionsC=True):
    """generates annotations predicting audio section classes
    Parameters
    ----------
    wavF : str
    annF : str
        path to the file with the annotation section to predict
    clf : estimator
    featExtFun : callable
    lt : labelTransformer
    printProbs : bool
    readSections : list of str
        regions in the annF for which we predict
    printreadSectionsC : bool
    """

    ## load annotations
    waveform, fs = sT.wav2waveform(wavF)
    T, L0 = annT.anns2TLndarrays(annF)
    ## set of annotation-sections to predict
    if readSections is None:
        readSections = np.array(list(set(L0)))
    ## filter for sections of interest
    IO_sections = np.isin(L0, readSections)
    Tp = T[IO_sections]
    L = L0[IO_sections]
    Lp = np.zeros_like(L)
    ## for each annotation section
    for i, label in enumerate(L):  # for each section
        waveformSec = auf.getWavSec(waveform, fs,
                                    *Tp[i])  # load waveform section
        M0 = featExtFun(waveformSec)  # extract features
        M = np.expand_dims(M0.flatten(), axis=0)
        Lp[i] = lt.num2nom(clf.predict(M))[0]  # predict

    return Tp, Lp
예제 #5
0
def get_DataXy_fromWavFannF(wavF, annF, feExFun, labelsHierarchy):
    """
    extracts features and its labels (ground truth) from wavF and annF files
    and returns its dataXy_names instance
    ----------
    wavF: str
    annF: str
    feExFun: callable
    labelsHierarchy: list
    """
    # np.loadtxt(collFi, delimiter='\t', dtype='|S')
    # print("\n==========\nTEST\n==============",wavF)
    waveForm, fs = wav2waveform(wavF)
    tf = len(waveForm) / fs

    M0 = feExFun(waveForm)
    m = len(M0)
    y0_names = auf.annotationsFi2instances(annF,
                                           m,
                                           tf,
                                           labelsHierarchy=labelsHierarchy)
    datO = myML.dataXy_names(M0, y0_names)
    return datO
예제 #6
0
def predictSoundSections(wavF,
                         clf,
                         lt,
                         feExFun,
                         outF='default',
                         annSections='default'):
    '''
    predicts and generates the annotations of the given wavF walking

    Parameters:
    -----------
    wavF : str
        path to wav file
    clf : estimator
        classifier object
    lt : label transformer object
    feExFun : callable
        feature extraction
    out : str
        annotations out file name, default = wav base name + '-predictions'
    annSections : array
        sections to print, default = ['c']
    '''
    if outF == 'default':
        bN = os.path.basename(wavF)
        outF = os.path.join(outDir, bN.replace('.wav', '-predictions.txt'))

    waveForm, fs = sT.wav2waveform(wavF)

    oF = predictSectionsFromWaveform_genAnnotations(waveForm,
                                                    fs,
                                                    clf,
                                                    lt,
                                                    feExFun,
                                                    outF=outF,
                                                    annSections=annSections)
    return oF
예제 #7
0
def predictAnnotationSections(wavF,
                              annF,
                              clf,
                              featExtFun,
                              lt,
                              outFile=None,
                              sep='\t',
                              printProbs=False,
                              header='',
                              readSections=None,
                              printreadSectionsC=True):
    """predicts annotations for call types sections
    Parameters
    ----------
    wavF: str
    annF: str
    clf: estimator
    featExtFun: callable
    lt: labelTransformer
    outFil: str
    sep: str
    printProbs: bool
    header: str
    readSections: list of str
        regions in the annF for which we predict
    printreadSectionsC: bool
    See also
    --------
       TLpredictAnnotationSections
       TODO: recode to use TLpredictAnnotationSections
    """

    if outFile is None:
        outFile = os.path.splitext(annF)[0] + '-sectionPredictions.txt'

    try:  # remove file if exists
        os.remove(outFile)
    except OSError:
        pass

    ## load files
    waveform, fs = sT.wav2waveform(wavF)
    T, L = annT.anns2TLndarrays(annF)
    if readSections == None:
        readSections = list(set(L))
    ## for each annotation section
    for i, label in enumerate(L):
        if label in readSections:
            waveformSec = auf.getWavSec(waveform, fs, *T[i])
            ## predict
            try:
                M0 = featExtFun(waveformSec)  # estract features
                M = np.expand_dims(M0.flatten(), axis=0)
                y_pred = lt.num2nom(clf.predict(M))  # predict label
            except AssertionError:
                y_pred = [label]
            ## write
            with open(outFile, 'a') as f:
                f.write("{}\t{}\t{}\t{}\n".format(T[i, 0], T[i, 1], label,
                                                  *y_pred))
        elif printreadSectionsC:
            with open(outFile, 'a') as f:
                f.write("{}\t{}\t{}\t{}\n".format(T[i, 0], T[i, 1], label,
                                                  label))

    return outFile