Example #1
0
def make_prediction(args):

    model = load_model(args.model_fn,
                       custom_objects={
                           'Melspectrogram': Melspectrogram,
                           'Normalization2D': Normalization2D
                       })

    wav_paths = glob('{}/**'.format('wavfiles'), recursive=True)
    wav_paths = sorted([x for x in wav_paths if '.wav' in x])
    classes = sorted(os.listdir('wavfiles'))

    for wav_fn in wav_paths:
        rate, wav = downsample_mono(wav_fn, args.sr)
        mask, env = envelope(wav, rate, threshold=args.threshold)
        clean_wav = wav[mask]
        step = int(args.sr * args.dt)
        batch = []

        for i in range(0, clean_wav.shape[0], step):
            sample = clean_wav[i:i + step]
            sample = sample.reshape(1, -1)
            if sample.shape[0] < step:
                tmp = np.zeros(shape=(1, step), dtype=np.int16)
                tmp[:, :sample.shape[1]] = sample.flatten()
                sample = tmp
            batch.append(sample)
        X_batch = np.array(batch)
        y_pred = model.predict(X_batch)
        y_mean = np.mean(y_pred, axis=0)
        y_pred = np.argmax(y_mean)
        print(wav_fn, classes[y_pred])
Example #2
0
def make_prediction(args):

    model = load_model(args.model_fn,
                       custom_objects={
                           'STFT': STFT,
                           'Magnitude': Magnitude,
                           'ApplyFilterbank': ApplyFilterbank,
                           'MagnitudeToDecibel': MagnitudeToDecibel
                       })
    wav_paths = glob('{}/**'.format(args.src_dir), recursive=True)
    wav_paths = sorted(
        [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x])
    classes = sorted(os.listdir(args.src_dir))
    labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths]
    le = LabelEncoder()
    y_true = le.fit_transform(labels)
    results = []
    ylabelsResults = []
    ytrueLab = []

    for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)):
        rate, wav = downsample_mono(wav_fn, args.sr)
        mask, env = envelope(wav, rate, threshold=args.threshold)
        clean_wav = wav[mask]
        step = int(args.sr * args.dt)
        batch = []

        for i in range(0, clean_wav.shape[0], step):
            sample = clean_wav[i:i + step]
            sample = sample.reshape(-1, 1)
            if sample.shape[0] < step:
                tmp = np.zeros(shape=(step, 1), dtype=np.float32)
                tmp[:sample.shape[0], :] = sample.flatten().reshape(-1, 1)
                sample = tmp
            batch.append(sample)
        if (batch != []):
            X_batch = np.array(batch, dtype=np.float32)
            y_pred = model.predict(X_batch)
            y_mean = np.mean(y_pred, axis=0)
            y_pred = np.argmax(y_mean)
            real_class = os.path.dirname(wav_fn).split('/')[-1]
            print('Actual class: {}, Predicted class: {}'.format(
                real_class, classes[y_pred]))
            results.append(y_mean)
            ylabelsResults.append(labels[z])

    labenc = LabelEncoder()
    ytrueLab = labenc.fit_transform(ylabelsResults)
    np.save(os.path.join('logs', args.true_fn), np.array(ytrueLab))
    np.save(os.path.join('logs', args.pred_fn), np.array(results))
def make_prediction(args):

    model = load_model(args.model_fn,
                       custom_objects={
                           'Melspectrogram': Melspectrogram,
                           'Normalization2D': Normalization2D
                       })

    wav_paths = glob('{}/**'.format('wavfiles'), recursive=True)
    wav_paths = sorted(
        [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x])
    classes = sorted(os.listdir('wavfiles'))
    labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths]
    le = LabelEncoder()
    y_true = le.fit_transform(labels)
    results = []

    for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)):
        try:
            rate, wav = downsample_mono(src_fn, args.sr)
        except:
            print('Error with downsampling')
            continue
        mask, env = envelope(wav, rate, threshold=args.threshold)
        clean_wav = wav[mask]
        step = int(args.sr * args.dt)
        batch = []

        for i in range(0, clean_wav.shape[0], step):
            sample = clean_wav[i:i + step]
            sample = sample.reshape(1, -1)
            if sample.shape[0] < step:
                tmp = np.zeros(shape=(1, step), dtype=np.int16)
                tmp[:, :sample.shape[1]] = sample.flatten()
                sample = tmp
            batch.append(sample)
        X_batch = np.array(batch)
        y_pred = model.predict(X_batch)
        y_mean = np.mean(y_pred, axis=0)
        y_pred = np.argmax(y_mean)
        results.append(y_mean)

    #np.save(os.path.join('logs', args.pred_fn), np.array(results))
    return np.array(results)
def make_prediction(args):
    #Welches feature soll benutzt werden?
    model = load_model(args.model_fn,
                       custom_objects={
                           'Melspectrogram':
                           kapre.time_frequency.Melspectrogram,
                           'Normalization2D': Normalization2D
                       })

    wav_paths = glob('{}/**'.format(args.src_dir), recursive=True)
    wav_paths = sorted(
        [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x])
    classes = sorted(os.listdir(args.src_dir))
    labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths]
    le = LabelEncoder()
    y_true = le.fit_transform(labels)
    results = []

    for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)):
        rate, wav = downsample_mono(wav_fn, args.sr)
        mask, env = envelope(wav, rate, threshold=args.threshold)
        clean_wav = wav[mask]
        step = int(args.sr * args.dt)
        batch = []

        for i in range(0, clean_wav.shape[0], step):
            sample = clean_wav[i:i + step]
            sample = sample.reshape(1, -1)
            if sample.shape[0] < step:
                tmp = np.zeros(shape=(1, step), dtype=np.int16)
                tmp[:, :sample.shape[1]] = sample.flatten()
                sample = tmp
            batch.append(sample)
        X_batch = np.array(batch)
        y_pred = model.predict(X_batch)
        y_mean = np.mean(y_pred, axis=0)
        y_pred = np.argmax(y_mean)
        real_class = os.path.dirname(wav_fn).split('/')[-1]
        print('Actual class: {}, Predicted class: {}'.format(
            real_class, classes[y_pred]))
        results.append(y_mean)

    np.save(os.path.join('logs', args.pred_fn), np.array(results))
Example #5
0
def make_prediction(model, args, address):
    # print("Converting mp3 to wav... This may take a while ")
    # convert(args.src_dir)

    # wav_paths = glob('{}/**'.format(args.src_dir), recursive=True)
    # wav_paths = sorted([x.replace(os.sep, '/') for x in wav_paths if '.wav' in x])
    # classes = sorted(os.listdir(args.src_dir))
    # labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths]
    # le = LabelEncoder()
    # y_true = le.fit_transform(labels)

    # Convert bytes into wav file
    results = []
    wav_paths = [address]  # only one file
    ret_dict = {k: 0 for k in CLASSES.values()}
    for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)):
        rate, wav = downsample_mono(wav_fn, args.sr)
        mask, env = envelope(wav, rate, threshold=args.threshold)
        clean_wav = wav[mask]
        step = int(args.sr * args.dt)
        batch = []

        for i in range(0, clean_wav.shape[0], step):
            sample = clean_wav[i:i + step]
            sample = sample.reshape(-1, 1)
            if sample.shape[0] < step:
                tmp = np.zeros(shape=(step, 1), dtype=np.float32)
                tmp[:sample.shape[0], :] = sample.flatten().reshape(-1, 1)
                sample = tmp
            batch.append(sample)
        X_batch = np.array(batch, dtype=np.float32)
        y_pred = model.predict(X_batch)
        y_mean = np.mean(y_pred, axis=0)
        y_pred_ind = np.argmax(y_mean)
        real_class = os.path.dirname(wav_fn).split("/")[-1]
        # print('Actual class: {}, Predicted class: {}'.format(real_class, classes[y_pred]))
        for i in range(y_mean.shape[0]):
            ret_dict[CLASSES[i]] = f"{y_mean[i]:.4f}"
        # ret_dict[CLASSES[y_pred_ind]] = 1

        # results.append(y_mean) #{crime: 1, covid: 0, sports: 0, others:0}
    return ret_dict
        sound = AudioSegment.from_mp3('./aud.mp3')
        sound.export('./audio.wav', format='wav')
        audio = './audio.wav'
model = load_model('lstm.h5',
                   custom_objects={
                       'STFT': STFT,
                       'Magnitude': Magnitude,
                       'ApplyFilterbank': ApplyFilterbank,
                       'MagnitudeToDecibel': MagnitudeToDecibel
                   })

classes = ['English', 'Hindi', 'Mandarin']

if audio:
    st.info('Analyzing audio file')
    rate, wav = downsample_mono(audio, 16000)
    mask, env = envelope(wav, rate, threshold=10)
    clean_wav = wav[mask]
    step = int(16000 * 10)
    batch = []
    for i in range(0, clean_wav.shape[0], step):
        sample = clean_wav[i:i + step]
        sample = sample.reshape(-1, 1)
        if sample.shape[0] < step:
            tmp = np.zeros(shape=(step, 1), dtype=np.float32)
            tmp[:sample.shape[0], :] = sample.flatten().reshape(-1, 1)
            sample = tmp
        batch.append(sample)
    X_batch = np.array(batch, dtype=np.float32)
    y_pred = model.predict(X_batch)
    y_mean = np.mean(y_pred, axis=0)