def extract_features_from_audio(self, path): samples, features, classes = [], [], [] model = yamnet_frames_model(Params()) model.load_weights(self.yamnet_weights) for sound in tqdm(os.listdir(path)): basename = os.path.splitext(sound)[0] extension = os.path.splitext(sound)[1] try: wav = librosa.load(os.path.join(path, sound), sr=16000)[0].astype(np.float32) scores, embeddings, spectrogram = model(wav) for feature in embeddings: samples.append(basename) features.append(feature) for feature in scores: classes.append(feature) except: logging.error('Unable to process file: {0}'.format(sound)) continue self.samples = np.asarray(samples) self.features = np.asarray(features) self.classes = np.asarray(classes) return self.samples, self.features
def load_model(config): logger.debug("Loading model...") weights = load_weights(config) params = yamnet_params.Params() yamnet = yamnet_model.yamnet_frames_model(params) yamnet.load_weights(weights) yamnet_classes = yamnet_model.class_names( os.path.join(os.path.dirname(__file__), "yamnet", "yamnet_class_map.csv") ) return yamnet, yamnet_classes, params
def infer(frm_rcv): logger.info('infering ') from yamnet import yamnet as yamnet_model from yamnet import params import json top_k = 521 #report the top k classes connection = pika.BlockingConnection( pika.ConnectionParameters('localhost')) channel = connection.channel() channel.exchange_declare(exchange='inference', exchange_type='fanout') logger.info('model ') try: yamnet = yamnet_model.yamnet_frames_model(params.Params()) except Exception as e: logger.exception('UGGGG') sys.exit(1) yamnet.load_weights('/opt/soundscene/yamnet.h5') logger.info('done model ') while True: try: aud_time, normalized_audio_1hz = frm_rcv.recv() if len(normalized_audio_1hz.shape) > 1: normalized_audio_1hz = np.mean(normalized_audio_1hz, axis=1) # returns [1,classes] classes=521 #scores,_, mel = yamnet.predict(np.reshape(normalized_audio_1hz, [1, -1]), steps=1) scores, emb, mel = yamnet.predict(normalized_audio_1hz, steps=1) #for _n in scores:#1 sec samples for _n in scores[-1:]: #1 sec samples top_idxs = np.argsort(_n)[::-1][:top_k] inferences = _n[top_idxs] channel.basic_publish( exchange='inference', routing_key='', body=json.dumps( dict( time=aud_time, inferences=inferences.tolist(), mel=mel.tolist(), embeddings=[], #no embeddings produced for yamnet idxs=top_idxs.tolist()))) except Exception as e: logger.exception(e)
import argparse import imutils import time import dlib import cv2 import sys #sound packages import pyaudio import librosa import numpy as np import matplotlib.pyplot as plt import keras import yamnet.params as params import yamnet.yamnet as yamnet_model yamnet = yamnet_model.yamnet_frames_model(params) yamnet.load_weights('yamnet/yamnet.h5') yamnet_classes = yamnet_model.class_names('yamnet/yamnet_class_map.csv') # multiple cascades: https://github.com/Itseez/opencv/tree/master/data/haarcascades #https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') #https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_eye.xml eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml') cap = cv2.VideoCapture(0) frame_len = int(params.SAMPLE_RATE * 1) # 1sec p = pyaudio.PyAudio()
def classifyWav(wavPath, topClasses): semanticResults = {} path = wavPath.split("/") filename = path[-1].split(".")[0] # this is our temp folder we read and write the channels to targetFolder = '/'.join(path[:-2]) + "/splitChannels/" channels = 2 # we delete all of the content first in the temp folder try: subprocess.call(f"rm {targetFolder}*.wav", shell=True) except: pass if channels == 4: subprocess.call( f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \ -map_channel 0.0.1 {targetFolder + filename}_ch1.wav \ -map_channel 0.0.2 {targetFolder + filename}_ch2.wav \ -map_channel 0.0.3 {targetFolder + filename}_ch3.wav", shell=True) elif channels == 2: subprocess.call( f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \ -map_channel 0.0.1 {targetFolder + filename}_ch1.wav", shell=True) for i, wavfile in enumerate(os.scandir(targetFolder)): # the results of the current channel chResults = {} #print(wavfile.path) #print(wavfile.name) wav_data, sr = sf.read(wavfile.path, dtype=np.int16) waveform = wav_data / 32768.0 # The graph is designed for a sampling rate of 16 kHz, but higher rates should work too. # We also generate scores at a 10 Hz frame rate. params = yamnet_params.Params(sample_rate=sr, patch_hop_seconds=1) # Set up the YAMNet model. class_names = yamnet_model.class_names(PATH_YAMNET_CLASSES) yamnet = yamnet_model.yamnet_frames_model(params) yamnet.load_weights(PATH_YAMNET_WEIGHTS) # Run the model. scores, embeddings, _ = yamnet(waveform) scores = scores.numpy() mean_scores = np.mean(scores, axis=0) # we take the top 3 top_N = topClasses top_class_indices = np.argsort(mean_scores)[::-1][:top_N] # these are our scores rows = classes , cols = seconds top_scores = scores[:, top_class_indices].T yticks = range(0, top_N, 1) #class_names = [class_names[top_class_indices[x]] for x in yticks] # we need to match the classes later in the front - end class_names = top_class_indices for col in range(0, np.shape(top_scores)[-1]): curr_col = top_scores[:, col].flatten() chResults[col] = { int(cln): round(float(prct), 2) for cln, prct in zip(class_names, curr_col) } semanticResults[i] = chResults print(semanticResults) return semanticResults