def make_prediction(args): model = load_model(args.model_fn, custom_objects={ 'Melspectrogram': Melspectrogram, 'Normalization2D': Normalization2D }) wav_paths = glob('{}/**'.format('wavfiles'), recursive=True) wav_paths = sorted([x for x in wav_paths if '.wav' in x]) classes = sorted(os.listdir('wavfiles')) for wav_fn in wav_paths: rate, wav = downsample_mono(wav_fn, args.sr) mask, env = envelope(wav, rate, threshold=args.threshold) clean_wav = wav[mask] step = int(args.sr * args.dt) batch = [] for i in range(0, clean_wav.shape[0], step): sample = clean_wav[i:i + step] sample = sample.reshape(1, -1) if sample.shape[0] < step: tmp = np.zeros(shape=(1, step), dtype=np.int16) tmp[:, :sample.shape[1]] = sample.flatten() sample = tmp batch.append(sample) X_batch = np.array(batch) y_pred = model.predict(X_batch) y_mean = np.mean(y_pred, axis=0) y_pred = np.argmax(y_mean) print(wav_fn, classes[y_pred])
def make_prediction(args): model = load_model(args.model_fn, custom_objects={ 'STFT': STFT, 'Magnitude': Magnitude, 'ApplyFilterbank': ApplyFilterbank, 'MagnitudeToDecibel': MagnitudeToDecibel }) wav_paths = glob('{}/**'.format(args.src_dir), recursive=True) wav_paths = sorted( [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x]) classes = sorted(os.listdir(args.src_dir)) labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths] le = LabelEncoder() y_true = le.fit_transform(labels) results = [] ylabelsResults = [] ytrueLab = [] for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)): rate, wav = downsample_mono(wav_fn, args.sr) mask, env = envelope(wav, rate, threshold=args.threshold) clean_wav = wav[mask] step = int(args.sr * args.dt) batch = [] for i in range(0, clean_wav.shape[0], step): sample = clean_wav[i:i + step] sample = sample.reshape(-1, 1) if sample.shape[0] < step: tmp = np.zeros(shape=(step, 1), dtype=np.float32) tmp[:sample.shape[0], :] = sample.flatten().reshape(-1, 1) sample = tmp batch.append(sample) if (batch != []): X_batch = np.array(batch, dtype=np.float32) y_pred = model.predict(X_batch) y_mean = np.mean(y_pred, axis=0) y_pred = np.argmax(y_mean) real_class = os.path.dirname(wav_fn).split('/')[-1] print('Actual class: {}, Predicted class: {}'.format( real_class, classes[y_pred])) results.append(y_mean) ylabelsResults.append(labels[z]) labenc = LabelEncoder() ytrueLab = labenc.fit_transform(ylabelsResults) np.save(os.path.join('logs', args.true_fn), np.array(ytrueLab)) np.save(os.path.join('logs', args.pred_fn), np.array(results))
def make_prediction(args): model = load_model(args.model_fn, custom_objects={ 'Melspectrogram': Melspectrogram, 'Normalization2D': Normalization2D }) wav_paths = glob('{}/**'.format('wavfiles'), recursive=True) wav_paths = sorted( [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x]) classes = sorted(os.listdir('wavfiles')) labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths] le = LabelEncoder() y_true = le.fit_transform(labels) results = [] for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)): try: rate, wav = downsample_mono(src_fn, args.sr) except: print('Error with downsampling') continue mask, env = envelope(wav, rate, threshold=args.threshold) clean_wav = wav[mask] step = int(args.sr * args.dt) batch = [] for i in range(0, clean_wav.shape[0], step): sample = clean_wav[i:i + step] sample = sample.reshape(1, -1) if sample.shape[0] < step: tmp = np.zeros(shape=(1, step), dtype=np.int16) tmp[:, :sample.shape[1]] = sample.flatten() sample = tmp batch.append(sample) X_batch = np.array(batch) y_pred = model.predict(X_batch) y_mean = np.mean(y_pred, axis=0) y_pred = np.argmax(y_mean) results.append(y_mean) #np.save(os.path.join('logs', args.pred_fn), np.array(results)) return np.array(results)
def make_prediction(args): #Welches feature soll benutzt werden? model = load_model(args.model_fn, custom_objects={ 'Melspectrogram': kapre.time_frequency.Melspectrogram, 'Normalization2D': Normalization2D }) wav_paths = glob('{}/**'.format(args.src_dir), recursive=True) wav_paths = sorted( [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x]) classes = sorted(os.listdir(args.src_dir)) labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths] le = LabelEncoder() y_true = le.fit_transform(labels) results = [] for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)): rate, wav = downsample_mono(wav_fn, args.sr) mask, env = envelope(wav, rate, threshold=args.threshold) clean_wav = wav[mask] step = int(args.sr * args.dt) batch = [] for i in range(0, clean_wav.shape[0], step): sample = clean_wav[i:i + step] sample = sample.reshape(1, -1) if sample.shape[0] < step: tmp = np.zeros(shape=(1, step), dtype=np.int16) tmp[:, :sample.shape[1]] = sample.flatten() sample = tmp batch.append(sample) X_batch = np.array(batch) y_pred = model.predict(X_batch) y_mean = np.mean(y_pred, axis=0) y_pred = np.argmax(y_mean) real_class = os.path.dirname(wav_fn).split('/')[-1] print('Actual class: {}, Predicted class: {}'.format( real_class, classes[y_pred])) results.append(y_mean) np.save(os.path.join('logs', args.pred_fn), np.array(results))
def make_prediction(model, args, address): # print("Converting mp3 to wav... This may take a while ") # convert(args.src_dir) # wav_paths = glob('{}/**'.format(args.src_dir), recursive=True) # wav_paths = sorted([x.replace(os.sep, '/') for x in wav_paths if '.wav' in x]) # classes = sorted(os.listdir(args.src_dir)) # labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths] # le = LabelEncoder() # y_true = le.fit_transform(labels) # Convert bytes into wav file results = [] wav_paths = [address] # only one file ret_dict = {k: 0 for k in CLASSES.values()} for z, wav_fn in tqdm(enumerate(wav_paths), total=len(wav_paths)): rate, wav = downsample_mono(wav_fn, args.sr) mask, env = envelope(wav, rate, threshold=args.threshold) clean_wav = wav[mask] step = int(args.sr * args.dt) batch = [] for i in range(0, clean_wav.shape[0], step): sample = clean_wav[i:i + step] sample = sample.reshape(-1, 1) if sample.shape[0] < step: tmp = np.zeros(shape=(step, 1), dtype=np.float32) tmp[:sample.shape[0], :] = sample.flatten().reshape(-1, 1) sample = tmp batch.append(sample) X_batch = np.array(batch, dtype=np.float32) y_pred = model.predict(X_batch) y_mean = np.mean(y_pred, axis=0) y_pred_ind = np.argmax(y_mean) real_class = os.path.dirname(wav_fn).split("/")[-1] # print('Actual class: {}, Predicted class: {}'.format(real_class, classes[y_pred])) for i in range(y_mean.shape[0]): ret_dict[CLASSES[i]] = f"{y_mean[i]:.4f}" # ret_dict[CLASSES[y_pred_ind]] = 1 # results.append(y_mean) #{crime: 1, covid: 0, sports: 0, others:0} return ret_dict
sound = AudioSegment.from_mp3('./aud.mp3') sound.export('./audio.wav', format='wav') audio = './audio.wav' model = load_model('lstm.h5', custom_objects={ 'STFT': STFT, 'Magnitude': Magnitude, 'ApplyFilterbank': ApplyFilterbank, 'MagnitudeToDecibel': MagnitudeToDecibel }) classes = ['English', 'Hindi', 'Mandarin'] if audio: st.info('Analyzing audio file') rate, wav = downsample_mono(audio, 16000) mask, env = envelope(wav, rate, threshold=10) clean_wav = wav[mask] step = int(16000 * 10) batch = [] for i in range(0, clean_wav.shape[0], step): sample = clean_wav[i:i + step] sample = sample.reshape(-1, 1) if sample.shape[0] < step: tmp = np.zeros(shape=(step, 1), dtype=np.float32) tmp[:sample.shape[0], :] = sample.flatten().reshape(-1, 1) sample = tmp batch.append(sample) X_batch = np.array(batch, dtype=np.float32) y_pred = model.predict(X_batch) y_mean = np.mean(y_pred, axis=0)