Ejemplo n.º 1
0
def load_model(config):
    logger.debug("Loading model...")

    weights = load_weights(config)

    params = yamnet_params.Params()
    yamnet = yamnet_model.yamnet_frames_model(params)
    yamnet.load_weights(weights)

    yamnet_classes = yamnet_model.class_names(
        os.path.join(os.path.dirname(__file__), "yamnet", "yamnet_class_map.csv")
    )

    return yamnet, yamnet_classes, params
Ejemplo n.º 2
0
def infer(frm_rcv):
    logger.info('infering ')
    from yamnet import yamnet as yamnet_model
    from yamnet import params
    import json

    top_k = 521  #report the top k classes
    connection = pika.BlockingConnection(
        pika.ConnectionParameters('localhost'))
    channel = connection.channel()
    channel.exchange_declare(exchange='inference', exchange_type='fanout')

    logger.info('model ')
    try:
        yamnet = yamnet_model.yamnet_frames_model(params.Params())
    except Exception as e:
        logger.exception('UGGGG')
        sys.exit(1)
    yamnet.load_weights('/opt/soundscene/yamnet.h5')
    logger.info('done model ')

    while True:
        try:
            aud_time, normalized_audio_1hz = frm_rcv.recv()

            if len(normalized_audio_1hz.shape) > 1:
                normalized_audio_1hz = np.mean(normalized_audio_1hz, axis=1)

            # returns [1,classes] classes=521
            #scores,_, mel = yamnet.predict(np.reshape(normalized_audio_1hz, [1, -1]), steps=1)
            scores, emb, mel = yamnet.predict(normalized_audio_1hz, steps=1)

            #for _n in scores:#1 sec samples
            for _n in scores[-1:]:  #1 sec samples
                top_idxs = np.argsort(_n)[::-1][:top_k]
                inferences = _n[top_idxs]

                channel.basic_publish(
                    exchange='inference',
                    routing_key='',
                    body=json.dumps(
                        dict(
                            time=aud_time,
                            inferences=inferences.tolist(),
                            mel=mel.tolist(),
                            embeddings=[],  #no embeddings produced for yamnet
                            idxs=top_idxs.tolist())))
        except Exception as e:
            logger.exception(e)
Ejemplo n.º 3
0
 def waveform_to_features(waveform):
   """Creates VGGish features using the YAMNet feature extractor."""
   params = yamnet_params.Params(
       sample_rate=vggish_params.SAMPLE_RATE,
       stft_window_seconds=vggish_params.STFT_WINDOW_LENGTH_SECONDS,
       stft_hop_seconds=vggish_params.STFT_HOP_LENGTH_SECONDS,
       mel_bands=vggish_params.NUM_MEL_BINS,
       mel_min_hz=vggish_params.MEL_MIN_HZ,
       mel_max_hz=vggish_params.MEL_MAX_HZ,
       log_offset=vggish_params.LOG_OFFSET,
       patch_window_seconds=vggish_params.EXAMPLE_WINDOW_SECONDS,
       patch_hop_seconds=vggish_params.EXAMPLE_HOP_SECONDS)
   log_mel_spectrogram, features = yamnet_features.waveform_to_log_mel_spectrogram_patches(
       waveform, params)
   return features
Ejemplo n.º 4
0
def classifyWav(wavPath, topClasses):

    semanticResults = {}
    path = wavPath.split("/")
    filename = path[-1].split(".")[0]

    # this is our temp folder we read and write the channels to
    targetFolder = '/'.join(path[:-2]) + "/splitChannels/"

    channels = 2

    # we delete all of the content first in the temp folder
    try:
        subprocess.call(f"rm {targetFolder}*.wav", shell=True)
    except:
        pass

    if channels == 4:
        subprocess.call(
            f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \
                                       -map_channel 0.0.1 {targetFolder + filename}_ch1.wav \
                                       -map_channel 0.0.2 {targetFolder + filename}_ch2.wav \
                                       -map_channel 0.0.3 {targetFolder + filename}_ch3.wav",
            shell=True)
    elif channels == 2:
        subprocess.call(
            f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \
                                              -map_channel 0.0.1 {targetFolder + filename}_ch1.wav",
            shell=True)

    for i, wavfile in enumerate(os.scandir(targetFolder)):

        # the results of the current channel

        chResults = {}
        #print(wavfile.path)
        #print(wavfile.name)

        wav_data, sr = sf.read(wavfile.path, dtype=np.int16)
        waveform = wav_data / 32768.0

        # The graph is designed for a sampling rate of 16 kHz, but higher rates should work too.
        # We also generate scores at a 10 Hz frame rate.
        params = yamnet_params.Params(sample_rate=sr, patch_hop_seconds=1)

        # Set up the YAMNet model.
        class_names = yamnet_model.class_names(PATH_YAMNET_CLASSES)
        yamnet = yamnet_model.yamnet_frames_model(params)
        yamnet.load_weights(PATH_YAMNET_WEIGHTS)

        # Run the model.
        scores, embeddings, _ = yamnet(waveform)
        scores = scores.numpy()
        mean_scores = np.mean(scores, axis=0)

        # we take the top 3
        top_N = topClasses
        top_class_indices = np.argsort(mean_scores)[::-1][:top_N]

        # these are our scores rows = classes , cols = seconds
        top_scores = scores[:, top_class_indices].T
        yticks = range(0, top_N, 1)
        #class_names = [class_names[top_class_indices[x]] for x in yticks]

        # we need to match the classes later in the front - end
        class_names = top_class_indices

        for col in range(0, np.shape(top_scores)[-1]):
            curr_col = top_scores[:, col].flatten()
            chResults[col] = {
                int(cln): round(float(prct), 2)
                for cln, prct in zip(class_names, curr_col)
            }
        semanticResults[i] = chResults

        print(semanticResults)

    return semanticResults