예제 #1
0
def run_detector(infile, outdir, classif_model=None, deployment_file=None):
    ## Input paraneters ##########################################################

    # Spectrogram parameters
    frame = 0.0625  #3000
    nfft = 0.0853  # 4096
    step = 0.01  # 5
    fmin = 0
    fmax = 1000
    window_type = 'hann'

    # start and stop time of wavfile to analyze
    #t1 = 0 # 24
    #t2 = 60 # 40
    ## ###########################################################################
    outfile = os.path.join(outdir, os.path.split(infile)[1] + '.nc')

    if os.path.exists(outfile) is False:
        # load audio data
        sound = Sound(infile)
        #sound.read(channel=0, chunk=[t1, t2], unit='sec')
        sound.read(channel=0, unit='sec')
        # Calculates  spectrogram
        print('Spectrogram')
        spectro = Spectrogram(frame,
                              window_type,
                              nfft,
                              step,
                              sound.waveform_sampling_frequency,
                              unit='sec')
        spectro.compute(sound, dB=True, use_dask=True, dask_chunks=100)
        # Crop unused frequencies
        spectro.crop(frequency_min=fmin, frequency_max=fmax, inplace=True)
        # Denoise
        print('Denoise')
        spectro.denoise(
            'median_equalizer',
            window_duration=3,
            use_dask=True,
            dask_chunks=(50, 50000),  #'auto',#(87,10000),
            inplace=True)
        # Detector
        print('Detector')
        file_timestamp = ecosound.core.tools.filename_to_datetime(infile)[0]
        detector = DetectorFactory('BlobDetector',
                                   kernel_duration=0.1,
                                   kernel_bandwidth=300,
                                   threshold=10,
                                   duration_min=0.05,
                                   bandwidth_min=40)
        detections = detector.run(
            spectro,
            start_time=file_timestamp,
            use_dask=True,
            dask_chunks=(4096, 50000),  #'auto',
            debug=False)
        # Maasurements
        print('Measurements')
        spectro_features = MeasurerFactory('SpectrogramFeatures',
                                           resolution_time=0.001,
                                           resolution_freq=0.1,
                                           interp='linear')
        measurements = spectro_features.compute(spectro,
                                                detections,
                                                debug=False,
                                                verbose=False,
                                                use_dask=True)

        # Add metadata
        if deployment_file:
            measurements.insert_metadata(deployment_file)

        # Add file informations
        file_name = os.path.splitext(os.path.basename(infile))[0]
        file_dir = os.path.dirname(infile)
        file_ext = os.path.splitext(infile)[1]
        measurements.insert_values(
            operator_name=platform.uname().node,
            audio_file_name=file_name,
            audio_file_dir=file_dir,
            audio_file_extension=file_ext,
            audio_file_start_date=ecosound.core.tools.filename_to_datetime(
                infile)[0])

        # Classification
        print('Classification')
        if classif_model:
            features = classif_model['features']
            model = classif_model['model']
            Norm_mean = classif_model['normalization_mean']
            Norm_std = classif_model['normalization_std']
            classes_encoder = classif_model['classes']
            # data dataframe
            data = measurements.data
            n1 = len(data)
            # drop observations/rows with NaNs
            data = data.replace([np.inf, -np.inf], np.nan)
            data.dropna(subset=features,
                        axis=0,
                        how='any',
                        thresh=None,
                        inplace=True)
            n2 = len(data)
            print('Deleted observations (due to NaNs): ' + str(n1 - n2))
            # Classification - predictions
            X = data[features]
            X = (X - Norm_mean) / Norm_std
            pred_class = model.predict(X)
            pred_prob = model.predict_proba(X)
            pred_prob = pred_prob[range(0, len(pred_class)), pred_class]
            # Relabel
            for index, row in classes_encoder.iterrows():
                pred_class = [
                    row['label'] if i == row['ID'] else i for i in pred_class
                ]
            # update measurements
            data['label_class'] = pred_class
            data['confidence'] = pred_prob

        # sort detections by ascending start date/time
        data.sort_values('time_min_offset',
                         axis=0,
                         ascending=True,
                         inplace=True)
        # save result as NetCDF file
        print('Saving')
        measurements.data = data
        measurements.to_netcdf(outfile)
    else:
        print('Recording already processed.')
예제 #2
0
def run_detector(infile, channel, config, chunk=None, deployment_file=None):

    sound = Sound(infile)
    # load audio data
    if chunk:
        sound.read(channel=channel, chunk=[t1, t2], unit='sec', detrend=True)
        time_offset_sec = t1
    else:
        sound.read(channel=channel, detrend=True)
        time_offset_sec = 0

    # Calculates  spectrogram
    spectro = Spectrogram(config['SPECTROGRAM']['frame_sec'],
                                  config['SPECTROGRAM']['window_type'],
                                  config['SPECTROGRAM']['nfft_sec'],
                                  config['SPECTROGRAM']['step_sec'],
                                  sound.waveform_sampling_frequency,
                                  unit='sec',)
    spectro.compute(sound,
                    config['SPECTROGRAM']['dB'],
                    config['SPECTROGRAM']['use_dask'],
                    config['SPECTROGRAM']['dask_chunks'],)

    spectro.crop(frequency_min=config['SPECTROGRAM']['fmin_hz'],
                         frequency_max=config['SPECTROGRAM']['fmax_hz'],
                         inplace=True,
                         )
    # Denoise
    spectro.denoise(config['DENOISER']['denoiser_name'],
                    window_duration=config['DENOISER']['window_duration_sec'],
                    use_dask=config['DENOISER']['use_dask'],
                    dask_chunks=tuple(config['DENOISER']['dask_chunks']),
                    inplace=True)
    # Detector
    file_timestamp = ecosound.core.tools.filename_to_datetime(sound.file_full_path)[0]
    detector = DetectorFactory(config['DETECTOR']['detector_name'],
                               kernel_duration=config['DETECTOR']['kernel_duration_sec'],
                               kernel_bandwidth=config['DETECTOR']['kernel_bandwidth_hz'],
                               threshold=config['DETECTOR']['threshold'],
                               duration_min=config['DETECTOR']['duration_min_sec'],
                               bandwidth_min=config['DETECTOR']['bandwidth_min_hz']
                               )
    start_time = file_timestamp + datetime.timedelta(seconds=time_offset_sec)
    detections = detector.run(spectro,
                              start_time=start_time,
                              use_dask=config['DETECTOR']['use_dask'],
                              dask_chunks=tuple(config['DETECTOR']['dask_chunks']),
                              debug=False,
                              )
    # add time offset in only a section of recording was analysed.
    detections.data['time_min_offset'] = detections.data['time_min_offset'] + time_offset_sec
    detections.data['time_max_offset'] = detections.data['time_max_offset'] + time_offset_sec

    # add deployment metadata
    detections.insert_metadata(deployment_file, channel=channel)

    # Add file informations
    file_name = os.path.splitext(os.path.basename(sound.file_full_path))[0]
    file_dir = os.path.dirname(sound.file_full_path)
    file_ext = os.path.splitext(sound.file_full_path)[1]
    detections.insert_values(operator_name=platform.uname().node,
                               audio_file_name=file_name,
                               audio_file_dir=file_dir,
                               audio_file_extension=file_ext,
                               audio_file_start_date=ecosound.core.tools.filename_to_datetime(sound.file_full_path)[0]
                               )

    return detections
예제 #3
0
# Calculates  spectrogram
spectro = Spectrogram(frame,
                      window_type,
                      nfft,
                      step,
                      sound.waveform_sampling_frequency,
                      unit='samp')
spectro.compute(sound, dB=True, use_dask=True, dask_chunks=40)

# Crop unused frequencies
spectro.crop(frequency_min=fmin, frequency_max=fmax, inplace=True)

# Denoise
spectro.denoise('median_equalizer',
                window_duration=3,
                use_dask=True,
                dask_chunks=(2048, 1000),
                inplace=True)

# Detector
detector = DetectorFactory('BlobDetector',
                           kernel_duration=0.1,
                           kernel_bandwidth=300,
                           threshold=10,
                           duration_min=0.05,
                           bandwidth_min=40)
detections = detector.run(spectro,
                          use_dask=True,
                          dask_chunks=(2048, 1000),
                          debug=False)
예제 #4
0
def run_detector(infile, outdir, classif_model, config, deployment_file=None, extension=".wav", overwrite=False, netcdf= True, pamlab=False, raven=False):
    """
    Run the fish sound detector.

        Parameters
        ----------
        infile : str
            Path of the audio file to process.
        outdir : str
            Path of the output folder where the results will be written.
        classif_model : str
            Path and name of the classification model to use (.sav pickle file)
        config : dict
            Dict with all parameters from the yaml file.
        deployment_file : str, optional
            Path and name of the csv file with all the deployment information.
            The default is None.
        extension : str, optional
            Extension of the audio files to process. The default is ".wav".
        overwrite : bool, optional
            If set to True, overwrites results (i.e. netcdf files) even if they
            already exist in the outdir folder. The default is False.
        netcdf : bool, optional
            If set to True, saves results as netcdf4 files (.nc).
            The default is True.
        pamlab : bool, optional
            If set to True, saves results as PAMlab files (.log).
            The default is False.
        raven : bool, optional
            If set to True, saves results as Raven files (.txt).
            The default is False.

        Returns
        -------
        None.

    """
    outfile = os.path.join(outdir, os.path.split(infile)[1] + '.nc')
    if (os.path.exists(outfile) is False) or (os.path.exists(outfile) and overwrite):
        # load audio data
        sound = Sound(infile)
        sound.read(channel=config['AUDIO']['channel'], unit='sec')
        # Calculates  spectrogram
        print('Spectrogram')
        spectro = Spectrogram(config['SPECTROGRAM']['frame_sec'],
                              config['SPECTROGRAM']['window_type'],
                              config['SPECTROGRAM']['nfft_sec'],
                              config['SPECTROGRAM']['step_sec'],
                              sound.waveform_sampling_frequency,
                              unit='sec',
                              )
        spectro.compute(sound,
                        config['SPECTROGRAM']['dB'],
                        config['SPECTROGRAM']['use_dask'],
                        config['SPECTROGRAM']['dask_chunks'],
                        )
        # Crop unused frequencies
        spectro.crop(frequency_min=config['SPECTROGRAM']['fmin_hz'],
                     frequency_max=config['SPECTROGRAM']['fmax_hz'],
                     inplace=True,
                     )
        # Denoise
        print('Denoise')
        spectro.denoise(config['DENOISER']['denoiser_name'],
                        window_duration=config['DENOISER']['window_duration_sec'],
                        use_dask=config['DENOISER']['use_dask'],
                        dask_chunks=tuple(config['DENOISER']['dask_chunks']),
                        inplace=True)
        # Detector
        print('Detector')
        file_timestamp = ecosound.core.tools.filename_to_datetime(infile)[0]
        detector = DetectorFactory(config['DETECTOR']['detector_name'],
                                   kernel_duration=config['DETECTOR']['kernel_duration_sec'],
                                   kernel_bandwidth=config['DETECTOR']['kernel_bandwidth_hz'],
                                   threshold=config['DETECTOR']['threshold'],
                                   duration_min=config['DETECTOR']['duration_min_sec'],
                                   bandwidth_min=config['DETECTOR']['bandwidth_min_hz']
                                   )
        detections = detector.run(spectro,
                                  start_time=file_timestamp,
                                  use_dask=config['DETECTOR']['use_dask'],
                                  dask_chunks=tuple(config['DETECTOR']['dask_chunks']),
                                  debug=False,
                                  )
        # Maasurements
        print('Measurements')
        spectro_features = MeasurerFactory(config['MEASURER']['measurer_name'],
                                           resolution_time=config['MEASURER']['resolution_time_sec'],
                                           resolution_freq=config['MEASURER']['resolution_freq_hz'],
                                           interp=config['MEASURER']['interp'],
                                           )
        measurements = spectro_features.compute(spectro,
                                                detections,
                                                debug=False,
                                                verbose=False,
                                                use_dask=config['MEASURER']['use_dask'])

        # Add metadata
        if deployment_file:
            measurements.insert_metadata(deployment_file)
        else:
            measurements.insert_values(audio_channel=0,
                                       UTC_offset=0,
                                       audio_sampling_frequency=0,
                                       audio_bit_depth=0,
                                       mooring_platform_name='',
                                       recorder_type='',
                                       recorder_SN='',
                                       hydrophone_model='',
                                       hydrophone_SN='',
                                       hydrophone_depth=0,
                                       location_name='',
                                       location_lat=0,
                                       location_lon=0,
                                       location_water_depth=0,
                                       deployment_ID='',
                                       )

        # Add file informations
        file_name = os.path.splitext(os.path.basename(infile))[0]
        file_dir = os.path.dirname(infile)
        file_ext = os.path.splitext(infile)[1]
        measurements.insert_values(operator_name=platform.uname().node,
                                   audio_file_name=file_name,
                                   audio_file_dir=file_dir,
                                   audio_file_extension=file_ext,
                                   audio_file_start_date=ecosound.core.tools.filename_to_datetime(infile)[0]
                                   )
        # Classification
        print('Classification')
        if classif_model:
            features = classif_model['features']
            model = classif_model['model']
            Norm_mean = classif_model['normalization_mean']
            Norm_std = classif_model['normalization_std']
            classes_encoder = classif_model['classes']
            # data dataframe
            data = measurements.data
            n1 = len(data)
            # drop observations/rows with NaNs
            data = data.replace([np.inf, -np.inf], np.nan)
            data.dropna(subset=features,
                        axis=0,
                        how='any',
                        thresh=None,
                        inplace=True)
            n2 = len(data)
            print('Deleted observations (due to NaNs): ' + str(n1-n2))
            # Classification - predictions
            X = data[features]
            X = (X-Norm_mean)/Norm_std
            pred_class = model.predict(X)
            pred_prob = model.predict_proba(X)
            pred_prob = pred_prob[range(0, len(pred_class)), pred_class]
            # Relabel
            for index, row in classes_encoder.iterrows():
                pred_class = [row['label'] if i == row['ID'] else i for i in pred_class]
            # update measurements
            data['label_class'] = pred_class
            data['confidence'] = pred_prob
        # sort detections by ascending start date/time
        data.sort_values('time_min_offset',
                         axis=0,
                         ascending=True,
                         inplace=True)
        # save result as NetCDF file
        print('Saving')
        measurements.data = data
        if netcdf:
            measurements.to_netcdf(outfile)
        if pamlab:
            measurements.to_pamlab(outdir)
        if raven:
            measurements.to_raven(outdir)

    else:
        print('Recording already processed.')
        logging.info('Recording already processed.')