Ejemplo n.º 1
0
    def extract_features_from_audio(self, path):
        samples, features, classes = [], [], []
        model = yamnet_frames_model(Params())
        model.load_weights(self.yamnet_weights)
        for sound in tqdm(os.listdir(path)):
            basename = os.path.splitext(sound)[0]
            extension = os.path.splitext(sound)[1]

            try:
                wav = librosa.load(os.path.join(path, sound),
                                   sr=16000)[0].astype(np.float32)

                scores, embeddings, spectrogram = model(wav)
                for feature in embeddings:
                    samples.append(basename)
                    features.append(feature)

                for feature in scores:
                    classes.append(feature)

            except:
                logging.error('Unable to process file: {0}'.format(sound))
                continue

        self.samples = np.asarray(samples)
        self.features = np.asarray(features)
        self.classes = np.asarray(classes)

        return self.samples, self.features
Ejemplo n.º 2
0
def load_model(config):
    logger.debug("Loading model...")

    weights = load_weights(config)

    params = yamnet_params.Params()
    yamnet = yamnet_model.yamnet_frames_model(params)
    yamnet.load_weights(weights)

    yamnet_classes = yamnet_model.class_names(
        os.path.join(os.path.dirname(__file__), "yamnet", "yamnet_class_map.csv")
    )

    return yamnet, yamnet_classes, params
Ejemplo n.º 3
0
def infer(frm_rcv):
    logger.info('infering ')
    from yamnet import yamnet as yamnet_model
    from yamnet import params
    import json

    top_k = 521  #report the top k classes
    connection = pika.BlockingConnection(
        pika.ConnectionParameters('localhost'))
    channel = connection.channel()
    channel.exchange_declare(exchange='inference', exchange_type='fanout')

    logger.info('model ')
    try:
        yamnet = yamnet_model.yamnet_frames_model(params.Params())
    except Exception as e:
        logger.exception('UGGGG')
        sys.exit(1)
    yamnet.load_weights('/opt/soundscene/yamnet.h5')
    logger.info('done model ')

    while True:
        try:
            aud_time, normalized_audio_1hz = frm_rcv.recv()

            if len(normalized_audio_1hz.shape) > 1:
                normalized_audio_1hz = np.mean(normalized_audio_1hz, axis=1)

            # returns [1,classes] classes=521
            #scores,_, mel = yamnet.predict(np.reshape(normalized_audio_1hz, [1, -1]), steps=1)
            scores, emb, mel = yamnet.predict(normalized_audio_1hz, steps=1)

            #for _n in scores:#1 sec samples
            for _n in scores[-1:]:  #1 sec samples
                top_idxs = np.argsort(_n)[::-1][:top_k]
                inferences = _n[top_idxs]

                channel.basic_publish(
                    exchange='inference',
                    routing_key='',
                    body=json.dumps(
                        dict(
                            time=aud_time,
                            inferences=inferences.tolist(),
                            mel=mel.tolist(),
                            embeddings=[],  #no embeddings produced for yamnet
                            idxs=top_idxs.tolist())))
        except Exception as e:
            logger.exception(e)
Ejemplo n.º 4
0
import argparse
import imutils
import time
import dlib
import cv2
import sys
#sound packages
import pyaudio
import librosa
import numpy as np
import matplotlib.pyplot as plt
import keras

import yamnet.params as params
import yamnet.yamnet as yamnet_model
yamnet = yamnet_model.yamnet_frames_model(params)
yamnet.load_weights('yamnet/yamnet.h5')
yamnet_classes = yamnet_model.class_names('yamnet/yamnet_class_map.csv')

# multiple cascades: https://github.com/Itseez/opencv/tree/master/data/haarcascades

#https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
#https://github.com/Itseez/opencv/blob/master/data/haarcascades/haarcascade_eye.xml
eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')

cap = cv2.VideoCapture(0)

frame_len = int(params.SAMPLE_RATE * 1)  # 1sec

p = pyaudio.PyAudio()
Ejemplo n.º 5
0
def classifyWav(wavPath, topClasses):

    semanticResults = {}
    path = wavPath.split("/")
    filename = path[-1].split(".")[0]

    # this is our temp folder we read and write the channels to
    targetFolder = '/'.join(path[:-2]) + "/splitChannels/"

    channels = 2

    # we delete all of the content first in the temp folder
    try:
        subprocess.call(f"rm {targetFolder}*.wav", shell=True)
    except:
        pass

    if channels == 4:
        subprocess.call(
            f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \
                                       -map_channel 0.0.1 {targetFolder + filename}_ch1.wav \
                                       -map_channel 0.0.2 {targetFolder + filename}_ch2.wav \
                                       -map_channel 0.0.3 {targetFolder + filename}_ch3.wav",
            shell=True)
    elif channels == 2:
        subprocess.call(
            f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \
                                              -map_channel 0.0.1 {targetFolder + filename}_ch1.wav",
            shell=True)

    for i, wavfile in enumerate(os.scandir(targetFolder)):

        # the results of the current channel

        chResults = {}
        #print(wavfile.path)
        #print(wavfile.name)

        wav_data, sr = sf.read(wavfile.path, dtype=np.int16)
        waveform = wav_data / 32768.0

        # The graph is designed for a sampling rate of 16 kHz, but higher rates should work too.
        # We also generate scores at a 10 Hz frame rate.
        params = yamnet_params.Params(sample_rate=sr, patch_hop_seconds=1)

        # Set up the YAMNet model.
        class_names = yamnet_model.class_names(PATH_YAMNET_CLASSES)
        yamnet = yamnet_model.yamnet_frames_model(params)
        yamnet.load_weights(PATH_YAMNET_WEIGHTS)

        # Run the model.
        scores, embeddings, _ = yamnet(waveform)
        scores = scores.numpy()
        mean_scores = np.mean(scores, axis=0)

        # we take the top 3
        top_N = topClasses
        top_class_indices = np.argsort(mean_scores)[::-1][:top_N]

        # these are our scores rows = classes , cols = seconds
        top_scores = scores[:, top_class_indices].T
        yticks = range(0, top_N, 1)
        #class_names = [class_names[top_class_indices[x]] for x in yticks]

        # we need to match the classes later in the front - end
        class_names = top_class_indices

        for col in range(0, np.shape(top_scores)[-1]):
            curr_col = top_scores[:, col].flatten()
            chResults[col] = {
                int(cln): round(float(prct), 2)
                for cln, prct in zip(class_names, curr_col)
            }
        semanticResults[i] = chResults

        print(semanticResults)

    return semanticResults