def create_data(sig, label_1, label_2):
    mfcc = get_mfcc(sig,
                    freq,
                    winstep=window_step,
                    winlen=window_size,
                    nfft=2048,
                    lowfreq=lowfreq,
                    highfreq=highfreq,
                    numcep=size,
                    nfilt=size + 2)
    # One-hot encoding
    labels = np.zeros((len(mfcc), N_classes_1 + N_classes_2))
    labels[:, label_dic[label_1]] = 1
    labels[:, label_dic[label_2]] = 1
    return mfcc, labels
Exemple #2
0
def create_data(sig, label):
    mfcc = get_mfcc(sig,
                    freq,
                    winstep=window_step,
                    winlen=window_size,
                    nfft=2048,
                    lowfreq=lowfreq,
                    highfreq=highfreq,
                    numcep=size,
                    nfilt=size + 2)
    # One-hot encoding
    num_label = np.zeros((len(mfcc), N_classes))
    num_label[:, label_dic[label]] = 1
    # Direct encoding
    # num_label = label_dic[label] * np.ones(len(mfcc))
    return mfcc, num_label
Exemple #3
0
def create_data(sig, label):
    mfcc = get_mfcc(sig,
                    freq,
                    winstep=window_step,
                    winlen=window_size,
                    nfft=2048,
                    lowfreq=lowfreq,
                    highfreq=highfreq,
                    numcep=size,
                    nfilt=size + 2)
    # One-hot encoding
    num_label = np.zeros((len(mfcc), N_classes))
    num_label[:, label_dic[label]] = 1
    # Direct encoding
    # num_label = label_dic[label] * np.ones(len(mfcc))
    time_per_occurrence_class[label_dic[label]].append(
        (stop - start) / (10.0**7))
    return mfcc, num_label
def create_data(sig, label):
    mfcc = get_mfcc(
        sig,
        freq,
        winstep=window_step,
        winlen=window_size,
        nfft=2048,
        lowfreq=lowfreq,
        highfreq=highfreq,
        numcep=size,
        nfilt=size + 2,
    )
    # One-hot encoding
    num_label = np.zeros((len(mfcc), N_classes))
    num_label[:, label_dic[label]] = 1
    # Direct encoding
    # num_label = label_dic[label] * np.ones(len(mfcc))
    return mfcc, num_label
Exemple #5
0
def create_data(sig, network_type):
    if network_type == "DNN":
        mfcc = get_mfcc(signal,
                        freq,
                        winstep=window_step,
                        winlen=window_size,
                        nfft=2048,
                        lowfreq=lowfreq,
                        highfreq=highfreq,
                        numcep=size,
                        nfilt=size + 2)
    elif network_type == "CNN":
        mfcc, energy = MFSC(signal,
                            freq,
                            winstep=window_step,
                            nfft=2048,
                            lowfreq=100,
                            highfreq=highfreq,
                            nfilt=size)
    return mfcc
def create_data(sig, label):
    mfcc = get_mfcc(sig, freq, winstep=window_step, winlen=window_size, nfft=2048, lowfreq=lowfreq,
                    highfreq=highfreq, numcep=size, nfilt=size+2)
    num_label = label_dic[label]*np.ones(len(mfcc))
    time_per_occurrence_class[label_dic[label]].append((stop - start) / (10.0 ** 7))
    return mfcc, num_label
 for j in xrange(len(lines)):
     try:
         cur_line = lines[j].split()
         start = float(cur_line[0])
         stop = float(cur_line[1])
         label = cur_line[2]
         if "WS" in lab_name:
             length = stop - start
         else:
             length = (stop - start) / 10.0 ** 7
         audio = f.read_frames(freq * length)
         if label in label_dic:
             if time < threshold:
                 # energy = np.sum(audio ** 2, 0) / len(audio)
                 signal = audio  # audio/math.sqrt(energy)
                 mfcc = get_mfcc(signal, freq, winstep=window_step, winlen=window_size, nfft=2048, lowfreq=lowfreq,
                                 highfreq=highfreq, numcep=size, nfilt=size + 2)
                 if compute_delta == "True":
                     d1_mfcc = np.zeros((mfcc.shape[0]-1,mfcc.shape[1]))
                     for k in range(mfcc.shape[0]-1):
                         d1_mfcc[k,:] = mfcc[k+1,:] - mfcc[k,:]
                     mfcc = mfcc[1:,:]
                 N_iter = np.floor((len(mfcc) - N) / slide)
                 # apply context window
                 if (length/window_step) > N:
                     time_per_occurrence_class[label_dic[label]].append(length)
                     time = np.sum(time_per_occurrence_class[label_dic[label]])
                     mfcc_matrix = np.zeros((1, size * N))
                     for k in range(int(N_iter)):
                         mfcc_vec = []
                         for kk in range(N):
                             mfcc_vec = np.concatenate((mfcc_vec, mfcc[k * slide + kk, :]))
Exemple #8
0
 if "WS" in lab_name:
     length = stop - start
 else:
     length = (stop - start) / 10.0**7
 audio = f.read_frames(freq * length)
 if label in label_dic:
     time_per_occurrence_class[label_dic[label]].append(length)
     time = np.sum(time_per_occurrence_class[label_dic[label]])
     if time < threshold:
         # energy = np.sum(audio ** 2, 0) / len(audio)
         signal = audio  # audio/math.sqrt(energy)
         mfcc = get_mfcc(signal,
                         freq,
                         winstep=window_step,
                         winlen=window_size,
                         nfft=2048,
                         lowfreq=lowfreq,
                         highfreq=highfreq,
                         numcep=size,
                         nfilt=size + 2)
         if compute_delta == "True":
             d1_mfcc = np.zeros(
                 (mfcc.shape[0] - 1, mfcc.shape[1]))
             for k in range(mfcc.shape[0] - 1):
                 d1_mfcc[k, :] = mfcc[k + 1, :] - mfcc[k, :]
             mfcc = mfcc[1:, :]
         N_iter = np.floor((len(mfcc) - N) / slide)
         # apply context window
         if (length / window_step) > N:
             mfcc_matrix = np.zeros((1, size * N))
             d1_matrix = np.zeros((1, size * N))