Ejemplo n.º 1
0
def add_tone(audio,
             start_time,
             duration,
             amplitude,
             frequency,
             channel_num=0,
             taper_duration=0):

    fs = audio.sample_rate

    # Create tone.
    length = signal_utils.seconds_to_frames(duration, fs)
    phases = np.arange(length) * 2 * np.pi * frequency / fs
    tone = amplitude * np.sin(phases)

    # Taper ends if specified.
    if taper_duration != 0:
        n = signal_utils.seconds_to_frames(taper_duration, fs)
        ramp = np.arange(n) / n
        tone[:n] *= ramp
        tone[-n:] *= 1 - ramp

    # Add tone to audio.
    start_index = signal_utils.seconds_to_frames(start_time, fs)
    audio.samples[channel_num, start_index:start_index + length] += tone
Ejemplo n.º 2
0
    def __init__(self,
                 file_path,
                 sample_rate,
                 score_scale_factor,
                 score_repetition_factor,
                 output_start_offset=0,
                 output_duration=None):

        self._sample_rate = sample_rate
        self._score_scale_factor = score_scale_factor
        self._score_repetition_factor = score_repetition_factor

        self._output_start_index = signal_utils.seconds_to_frames(
            output_start_offset, sample_rate)

        if output_duration is None:
            self._output_end_index = None
        else:
            max_file_length = signal_utils.seconds_to_frames(
                output_duration, sample_rate)
            self._output_end_index = self._output_start_index + max_file_length

        # Open wave file.
        self._writer = wave.open(file_path, 'wb')
        self._writer.setparams((2, 2, sample_rate, 0, 'NONE', None))

        self._samples_start_index = 0
def get_partition_read_interval(night_interval, first_input_file_start):

    offset = (night_interval.start - first_input_file_start).total_seconds()
    start_index = signal_utils.seconds_to_frames(offset, INPUT_SAMPLE_RATE)

    duration = (night_interval.end - night_interval.start).total_seconds()
    length = signal_utils.seconds_to_frames(duration, INPUT_SAMPLE_RATE)

    return Interval(start_index, start_index + length)
Ejemplo n.º 4
0
def _get_index_interval(time_interval, start_time, sample_rate):
    """
    Gets the audio file index interval corresponding to the specified
    time interval.
    """

    start_offset = (time_interval.start - start_time).total_seconds()
    start_index = signal_utils.seconds_to_frames(start_offset, sample_rate)

    duration = (time_interval.end - time_interval.start).total_seconds()
    length = signal_utils.seconds_to_frames(duration, sample_rate)

    return Interval(start=start_index, end=start_index + length)
Ejemplo n.º 5
0
def _get_index_interval(time_interval, start_time, sample_rate):
    
    """
    Gets the audio file index interval corresponding to the specified
    time interval.
    """
    
    start_offset = (time_interval.start - start_time).total_seconds()
    start_index = signal_utils.seconds_to_frames(start_offset, sample_rate)
    
    duration = (time_interval.end - time_interval.start).total_seconds()
    length = signal_utils.seconds_to_frames(duration, sample_rate)
    
    return Interval(start=start_index, end=start_index + length)
Ejemplo n.º 6
0
def _get_segment_source(clip, segment_source, source_duration):
    
    source = segment_source
    clip_length = clip.length
        
    if source == SEGMENT_SOURCE_CLIP:
        return (0, clip_length)
        
    elif source == SEGMENT_SOURCE_CLIP_CENTER:
        
        sample_rate = clip.sample_rate
        source_length = signal_utils.seconds_to_frames(
            source_duration, sample_rate)
        
        if source_length >= clip_length:
            return (0, clip_length)
        
        else:
            source_start_index = int((clip_length - source_length) // 2)
            return (source_start_index, source_length)
            
    elif source == SEGMENT_SOURCE_SELECTION:
        return clip.selection
    
    else:
        raise ValueError(
            'Unrecognized clip segment source "{}".'.format(source))
Ejemplo n.º 7
0
    def _process_detector_output(self, output_file_path):

        with open(output_file_path) as output_file:

            reader = csv.reader(output_file)

            # Skip header.
            next(reader)

            for row in reader:

                # Get clip start index from peak time.
                peak_time = self._parse_time(row[0])
                peak_index = signal_utils.seconds_to_frames(
                    peak_time, self._input_sample_rate)
                start_index = peak_index - self._clip_length // 2

                annotations = {}

                # Get detector score.
                annotations['Detector Score'] = float(row[2])

                # Get classification.
                classification = row[1]
                if classification != 'OTHE':
                    annotations['Classification'] = 'Call.' + classification


#                 print(
#                     'processing clip', peak_time, start_index, score,
#                     classification)

                self._listener.process_clip(start_index,
                                            self._clip_length,
                                            annotations=annotations)
Ejemplo n.º 8
0
 def _process_timestamps(self, timestamp_file_path):
     
     with open(timestamp_file_path) as timestamp_file:
             
         reader = csv.reader(timestamp_file)
         
         # Skip header
         next(reader)
         
         for row in reader:
             
             peak_time = float(row[1])
             
             # Get clip start index from peak time.
             peak_index = signal_utils.seconds_to_frames(
                 peak_time, self._input_sample_rate)
             start_index = peak_index - self._clip_length // 2
             
             score = float(row[2])
             annotations = {'Detector Score': score}
             
             # print('processing clip', peak_time, start_index, score)
             
             self._listener.process_clip(
                 start_index, self._clip_length, annotations=annotations)
Ejemplo n.º 9
0
def extract_call(audio, selection, config):
    
    samples = audio.samples
    sample_rate = audio.sample_rate

    start_index, end_index = selection
    center_index = (start_index + end_index - 1) // 2
    
    duration = config.call_segment_duration
    length = seconds_to_frames(duration, sample_rate)
    start_index = center_index - length // 2
    
    if start_index < 0:
        return None
    
    else:
        # start index is at least zero
        
        end_index = start_index + length
        
        if end_index > len(samples):
            return None
        
        else:
            return Bunch(
                samples=samples[start_index:end_index],
                sample_rate=sample_rate)
Ejemplo n.º 10
0
 def _process_timestamps(self, timestamp_file_path):
     
     with open(timestamp_file_path) as timestamp_file:
             
         reader = csv.reader(timestamp_file)
         
         # Skip header
         next(reader)
         
         for row in reader:
             
             peak_time = float(row[1])
             
             # Get clip start index from peak time.
             peak_index = signal_utils.seconds_to_frames(
                 peak_time, self._input_sample_rate)
             start_index = peak_index - self._clip_length // 2
             
             score = float(row[2])
             annotations = {'Detector Score': score}
             
             # print('processing clip', peak_time, start_index, score)
             
             self._listener.process_clip(
                 start_index, self._clip_length, annotations=annotations)
Ejemplo n.º 11
0
 def __init__(self, settings, input_sample_rate, listener):
     
     open_mp_utils.work_around_multiple_copies_issue()
     
     # Suppress TensorFlow INFO and DEBUG log messages.
     tf.logging.set_verbosity(tf.logging.WARN)
     
     self._settings = settings
     self._input_sample_rate = input_sample_rate
     self._listener = listener
     
     self._clip_length = signal_utils.seconds_to_frames(
         _CLIP_DURATION, self._input_sample_rate)
     
     # Create and open temporary wave file. Do not delete
     # automatically on close. We will close the file after we
     # finish writing it, and then BirdVoxDetect will open it
     # again for reading. We delete the file ourselves after
     # BirdVoxDetect finishes processing it.
     self._audio_file = tempfile.NamedTemporaryFile(
         suffix='.wav', delete=False)
     
     # Create wave file writer, through which we will write to the
     # wave file.
     self._audio_file_writer = WaveFileWriter(
         self._audio_file, 1, self._input_sample_rate)
Ejemplo n.º 12
0
 def __init__(self, settings, input_sample_rate, listener):
     
     open_mp_utils.work_around_multiple_copies_issue()
     
     # Suppress TensorFlow INFO and DEBUG log messages.
     tf.logging.set_verbosity(tf.logging.WARN)
     
     self._settings = settings
     self._input_sample_rate = input_sample_rate
     self._listener = listener
     
     self._clip_length = signal_utils.seconds_to_frames(
         _CLIP_DURATION, self._input_sample_rate)
     
     # Create and open temporary wave file. Do not delete
     # automatically on close. We will close the file after we
     # finish writing it, and then BirdVoxDetect will open it
     # again for reading. We delete the file ourselves after
     # BirdVoxDetect finishes processing it.
     self._audio_file = tempfile.NamedTemporaryFile(
         suffix='.wav', delete=False)
     
     # Create wave file writer, through which we will write to the
     # wave file.
     self._audio_file_writer = WaveFileWriter(
         self._audio_file, 1, self._input_sample_rate)
Ejemplo n.º 13
0
def _get_segment_source(clip, segment_source, source_duration):

    source = segment_source
    clip_length = clip.length

    if source == SEGMENT_SOURCE_CLIP:
        return (0, clip_length)

    elif source == SEGMENT_SOURCE_CLIP_CENTER:

        sample_rate = clip.sample_rate
        source_length = signal_utils.seconds_to_frames(source_duration,
                                                       sample_rate)

        if source_length >= clip_length:
            return (0, clip_length)

        else:
            source_start_index = int((clip_length - source_length) // 2)
            return (source_start_index, source_length)

    elif source == SEGMENT_SOURCE_SELECTION:
        return clip.selection

    else:
        raise ValueError(
            'Unrecognized clip segment source "{}".'.format(source))
Ejemplo n.º 14
0
def find_call(audio, config):
    
    # TODO: Why does `detect_tseeps` return selections in seconds?
    # TODO: We're tied to tseeps here since we call `detect_tseeps`.
    # Perhaps we should call `detect_events` with an appropriate
    # detector configuration instead.
    selections = nfc_detection_utils.detect_tseeps(audio)
    selection = nfc_detection_utils.get_longest_selection(selections)
    
    if selection is None:
        return None
    
    else:
        start_time, end_time = selection
        sample_rate = float(audio.sample_rate)
        start_index = seconds_to_frames(start_time, sample_rate)
        end_index = seconds_to_frames(end_time, sample_rate)
        return (start_index, end_index)
Ejemplo n.º 15
0
    def __init__(self, settings):

        self._settings = settings

        s = settings
        sample_rate = s.waveform_sample_rate

        # Get waveform trimming start and end indices.
        self._start_time_index = signal_utils.seconds_to_frames(
            s.waveform_start_time, sample_rate)
        waveform_length = signal_utils.seconds_to_frames(
            s.waveform_duration, sample_rate)
        self._end_time_index = self._start_time_index + waveform_length

        # Get spectrogram settings.
        window_size = signal_utils.seconds_to_frames(
            s.spectrogram_window_size, sample_rate)
        hop_size = signal_utils.seconds_to_frames(
            s.spectrogram_hop_size, sample_rate)
        dft_size = tfa_utils.get_dft_size(window_size)
        self._spectrogram_settings = Settings(
            window=data_windows.create_window('Hann', window_size),
            hop_size=hop_size,
            dft_size=dft_size,
            reference_power=1)

        # Get spectrogram shape.
        num_spectra = tfa_utils.get_num_analysis_records(
            waveform_length, window_size, hop_size)
        num_bins = dft_size // 2 + 1
        self._spectrogram_shape = (num_spectra, num_bins)
        self._augmented_spectrogram_shape = (1,) + self._spectrogram_shape

        # Get spectrogram trimming start and end indices.
        self._start_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_start_freq, sample_rate, dft_size)
        self._end_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_end_freq, sample_rate, dft_size) + 1
Ejemplo n.º 16
0
    def __init__(self, settings):

        self._settings = settings

        s = settings
        sample_rate = s.waveform_sample_rate

        # Get waveform trimming start and end indices.
        self._start_time_index = signal_utils.seconds_to_frames(
            s.waveform_start_time, sample_rate)
        waveform_length = signal_utils.seconds_to_frames(
            s.waveform_duration, sample_rate)
        self._end_time_index = self._start_time_index + waveform_length

        # Get spectrogram settings.
        window_size = signal_utils.seconds_to_frames(s.spectrogram_window_size,
                                                     sample_rate)
        hop_size = signal_utils.seconds_to_frames(s.spectrogram_hop_size,
                                                  sample_rate)
        dft_size = tfa_utils.get_dft_size(window_size)
        self._spectrogram_settings = Settings(
            window=data_windows.create_window('Hann', window_size),
            hop_size=hop_size,
            dft_size=dft_size,
            reference_power=1)

        # Get spectrogram shape.
        num_spectra = tfa_utils.get_num_analysis_records(
            waveform_length, window_size, hop_size)
        num_bins = dft_size // 2 + 1
        self._spectrogram_shape = (num_spectra, num_bins)
        self._augmented_spectrogram_shape = (1, ) + self._spectrogram_shape

        # Get spectrogram trimming start and end indices.
        self._start_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_start_freq, sample_rate, dft_size)
        self._end_freq_index = _freq_to_dft_bin_num(
            settings.spectrogram_end_freq, sample_rate, dft_size) + 1
    def __init__(
            self, file_path, sample_rate, score_scale_factor,
            score_repetition_factor, output_start_offset=0,
            output_duration=None):
        
        self._sample_rate = sample_rate
        self._score_scale_factor = score_scale_factor
        self._score_repetition_factor = score_repetition_factor

        self._output_start_index = signal_utils.seconds_to_frames(
            output_start_offset, sample_rate)
        
        if output_duration is None:
            self._output_end_index = None
        else:
            max_file_length = signal_utils.seconds_to_frames(
                output_duration, sample_rate)
            self._output_end_index = self._output_start_index + max_file_length
        
        # Open wave file.
        self._writer = wave.open(file_path, 'wb')
        self._writer.setparams((2, 2, sample_rate, 0, 'NONE', None))
        
        self._samples_start_index = 0
Ejemplo n.º 18
0
    def _notify_listener_of_clips(self, peak_indices, peak_scores,
                                  input_length, threshold):

        # print('Clips:')

        start_offset = self._input_chunk_start_index + self._clip_start_offset
        peak_indices *= self._hop_size

        for i, score in zip(peak_indices, peak_scores):

            # Convert classification index to input index, accounting for
            # any difference between classification sample rate and input
            # rate.
            f = self._input_sample_rate / self._purported_input_sample_rate
            classification_sample_rate = f * self._classifier_sample_rate
            t = signal_utils.get_duration(i, classification_sample_rate)
            i = signal_utils.seconds_to_frames(t, self._input_sample_rate)

            clip_start_index = i + start_offset
            clip_end_index = clip_start_index + self._clip_length
            chunk_end_index = self._input_chunk_start_index + input_length

            if clip_start_index < 0:
                logging.warning(
                    'Rejected clip that started before beginning of '
                    'recording.')

            elif clip_end_index > chunk_end_index:
                # clip might extend past end of recording, since it extends
                # past the end of this chunk (we do not know whether or
                # not the current chunk is the last)

                logging.warning(
                    'Rejected clip that ended after end of recording chunk.')

            else:
                # all clip samples are in the recording interval extending
                # from the beginning of the recording to the end of the
                # current chunk

                # print(
                #     '    {} {}'.format(clip_start_index, self._clip_length))

                annotations = {'Detector Score': 100 * score}

                self._listener.process_clip(clip_start_index,
                                            self._clip_length, threshold,
                                            annotations)
Ejemplo n.º 19
0
 def _notify_listener_of_clips(
         self, peak_indices, peak_scores, input_length, threshold):
     
     # print('Clips:')
     
     start_offset = self._input_chunk_start_index + self._clip_start_offset
     peak_indices *= self._hop_size
     
     for i, score in zip(peak_indices, peak_scores):
         
         # Convert classification index to input index, accounting
         # for difference between classifier sample rate and input
         # sample rate.
         t = signal_utils.get_duration(i, self._classifier_sample_rate)
         i = signal_utils.seconds_to_frames(t, self._input_sample_rate)
         
         clip_start_index = i + start_offset
         clip_end_index = clip_start_index + self._clip_length
         chunk_end_index = self._input_chunk_start_index + input_length
         
         if clip_start_index < 0:
             logging.warning(
                 'Rejected clip that started before beginning of '
                 'recording.')
             
         elif clip_end_index > chunk_end_index:
             # clip might extend past end of recording, since it extends
             # past the end of this chunk (we do not know whether or
             # not the current chunk is the last)
             
             logging.warning(
                 'Rejected clip that ended after end of recording chunk.')
             
         else:
             # all clip samples are in the recording interval extending
             # from the beginning of the recording to the end of the
             # current chunk
             
             # print(
             #     '    {} {}'.format(clip_start_index, self._clip_length))
             
             annotations = {'Detector Score': 100 * score}
             
             self._listener.process_clip(
                 clip_start_index, self._clip_length, threshold,
                 annotations)
Ejemplo n.º 20
0
    def __init__(self, mode, settings, output_feature_name='spectrogram'):
        
        # `mode` can be `DATASET_MODE_TRAINING`, `DATASET_MODE_EVALUATION`,
        # or `DATASET_MODE_INFERENCE`.
        #
        # When `mode` is `DATASET_MODE_TRAINING`, dataset examples are
        # preprocessed according to certain settings that control waveform
        # slicing and data augmentation.
        #
        # When `mode` is `DATASET_MODE_EVALUATION`, dataset examples are
        # processed as when it is `DATASET_MODE_TRAINING`, except that
        # data augmentation can be turned on or off via the
        # `evaluation_data_augmentation_enabled` setting.
        #
        # When `mode` is `DATASET_MODE_INFERENCE`, dataset waveforms are
        # not sliced as they are when it is `DATASET_MODE_TRAINING` or
        # `DATASET_MODE_EVALUATION`. Instead, the slicing start index is
        # always zero. Data augmentation is also disabled.

        self.settings = settings
        self.output_feature_name = output_feature_name
        
        s = settings
        
        (self.time_start_index, self.time_end_index,
         self.window_size, self.hop_size, self.dft_size,
         self.freq_start_index, self.freq_end_index) = \
            _get_low_level_preprocessing_settings(mode, s)
         
        self.waveform_length = self.time_end_index - self.time_start_index
                
        self.window_fn = functools.partial(
            tf.contrib.signal.hann_window, periodic=True)
        
        augmentation_enabled = _is_data_augmentation_enabled(mode, s)
            
        self.random_waveform_time_shifting_enabled = \
            augmentation_enabled and s.random_waveform_time_shifting_enabled
        
        if self.random_waveform_time_shifting_enabled:
            self.max_waveform_time_shift = signal_utils.seconds_to_frames(
                s.max_waveform_time_shift, s.waveform_sample_rate)
Ejemplo n.º 21
0
    def __init__(self, mode, settings, output_feature_name='spectrogram'):

        # `mode` can be `DATASET_MODE_TRAINING`, `DATASET_MODE_EVALUATION`,
        # or `DATASET_MODE_INFERENCE`.
        #
        # When `mode` is `DATASET_MODE_TRAINING`, dataset examples are
        # preprocessed according to certain settings that control waveform
        # slicing and data augmentation.
        #
        # When `mode` is `DATASET_MODE_EVALUATION`, dataset examples are
        # processed as when it is `DATASET_MODE_TRAINING`, except that
        # data augmentation can be turned on or off via the
        # `evaluation_data_augmentation_enabled` setting.
        #
        # When `mode` is `DATASET_MODE_INFERENCE`, dataset waveforms are
        # not sliced as they are when it is `DATASET_MODE_TRAINING` or
        # `DATASET_MODE_EVALUATION`. Instead, the slicing start index is
        # always zero. Data augmentation is also disabled.

        self.settings = settings
        self.output_feature_name = output_feature_name

        s = settings

        (self.time_start_index, self.time_end_index,
         self.window_size, self.hop_size, self.dft_size,
         self.freq_start_index, self.freq_end_index) = \
            _get_low_level_preprocessing_settings(mode, s)

        self.waveform_length = self.time_end_index - self.time_start_index

        self.window_fn = functools.partial(tf.contrib.signal.hann_window,
                                           periodic=True)

        augmentation_enabled = _is_data_augmentation_enabled(mode, s)

        self.random_waveform_time_shifting_enabled = \
            augmentation_enabled and s.random_waveform_time_shifting_enabled

        if self.random_waveform_time_shifting_enabled:
            self.max_waveform_time_shift = signal_utils.seconds_to_frames(
                s.max_waveform_time_shift, s.waveform_sample_rate)
Ejemplo n.º 22
0
def extract_clip_segment(clip,
                         segment_duration,
                         segment_source,
                         source_duration=None):

    source = _get_segment_source(clip, segment_source, source_duration)

    if source is None:
        return None

    else:

        source_start_index, source_length = source

        sample_rate = clip.sample_rate
        segment_length = signal_utils.seconds_to_frames(
            segment_duration, sample_rate)

        if source_length < segment_length:
            # source not long enough to extract segment from

            return None

        else:

            # Extract samples from source.
            if source_length == segment_length:
                offset = 0
            else:
                offset = random.randrange(source_length - segment_length)
            start_index = source_start_index + offset
            end_index = start_index + segment_length
            samples = clip_manager.instance.get_samples(clip)
            samples = samples[start_index:end_index]

            return Bunch(samples=samples,
                         sample_rate=clip.sample_rate,
                         start_index=start_index)
Ejemplo n.º 23
0
def extract_clip_segment(
        clip, segment_duration, segment_source, source_duration=None):
    
    source = _get_segment_source(clip, segment_source, source_duration)
    
    if source is None:
        return None
    
    else:
        
        source_start_index, source_length = source
        
        sample_rate = clip.sample_rate
        segment_length = signal_utils.seconds_to_frames(
            segment_duration, sample_rate)
        
        if source_length < segment_length:
            # source not long enough to extract segment from
            
            return None
            
        else:
            
            # Extract samples from source.
            if source_length == segment_length:
                offset = 0
            else:
                offset = random.randrange(source_length - segment_length)
            start_index = source_start_index + offset
            end_index = start_index + segment_length
            samples = clip_manager.instance.get_samples(clip)
            samples = samples[start_index:end_index]
            
            return Bunch(
                samples=samples,
                sample_rate=clip.sample_rate,
                start_index=start_index)
Ejemplo n.º 24
0
    def __init__(self, mode, settings, output_feature_name='spectrogram'):

        # `mode` can be `DATASET_MODE_TRAINING`, `DATASET_MODE_EVALUATION`,
        # or `DATASET_MODE_INFERENCE`.
        #
        # When `mode` is `DATASET_MODE_TRAINING` or
        # `DATASET_MODE_EVALUATION, dataset examples are preprocessed
        # according to certain settings that control waveform modification
        # and slicing.
        #
        # When `mode` is `DATASET_MODE_INFERENCE`, waveform modification
        # is disabled and the slicing start index is always zero.

        self.settings = settings
        self.output_feature_name = output_feature_name

        s = settings

        (self.time_start_index, self.time_end_index,
         self.window_size, self.hop_size, self.dft_size,
         self.freq_start_index, self.freq_end_index) = \
            _get_low_level_preprocessing_settings(mode, s)

        self.waveform_length = self.time_end_index - self.time_start_index

        self.window_fn = functools.partial(tf.contrib.signal.hann_window,
                                           periodic=True)

        # Note that we perform random waveform time shifting in the
        # evaluation dataset mode for a classifier that will be deployed
        # in a detector. The distribution of event onset times within
        # clips created by the Old Bird detectors (the current source of
        # our training data) is less uniform than the more or less flat
        # distribution that a classifier sees in the recording segments
        # presented to it when it is deployed in a detector. Random waveform
        # time shifting flattens and widens the onset time distribution in
        # the dataset, making it more like what it will see in deployment.
        self.random_waveform_time_shifting_enabled = \
            s.random_waveform_time_shifting_enabled and (
                mode == DATASET_MODE_TRAINING or (
                    mode == DATASET_MODE_EVALUATION and
                    s.target_use == TARGET_USE_DETECTOR))

        if self.random_waveform_time_shifting_enabled:
            self.max_waveform_time_shift = signal_utils.seconds_to_frames(
                s.max_waveform_time_shift, s.waveform_sample_rate)

        # We perform random waveform amplitude scaling during training
        # in order to make the distribution of input amplitudes wider
        # and more uniform, with the intent of making the classifier
        # less sensitive to variation in input amplitude. We perform
        # the same scaling during evaluation in order to gauge the
        # classifier's performance on a similar input amplitude
        # distribution. If in the future we perform some sort of
        # amplitude normalization (e.g. PCEN or normalization based
        # on order statistical background noise power estimates), the
        # random scaling may no longer be needed.
        self.random_waveform_amplitude_scaling_enabled = \
            s.random_waveform_amplitude_scaling_enabled and (
                mode == DATASET_MODE_TRAINING or
                mode == DATASET_MODE_EVALUATION)
def create_output_files(night_file_infos):

    max_length = int(
        signal_utils.seconds_to_frames(MAX_NIGHT_DURATION * 3600,
                                       INPUT_SAMPLE_RATE))
    input_samples = np.empty(max_length, dtype='int16')

    night_intervals = sorted(night_file_infos.keys())

    for night_interval in night_intervals:

        partitions = night_file_infos[night_interval]

        for file_infos in partitions:

            start_time = time.time()
            partition_input_length = 0

            first_input_start = file_infos[0][1].start

            # Get output file start time.
            if night_interval.start >= first_input_start:
                output_start_time = night_interval.start
            else:
                output_start_time = first_input_start

            output_file_name = create_output_file_name(output_start_time)
            print('Creating recording {}...'.format(output_file_name))

            partition_read_interval = get_partition_read_interval(
                night_interval, first_input_start)

            input_start_index = 0

            for i, (input_file_path, _, input_length) in enumerate(file_infos):

                input_end_index = input_start_index + input_length
                input_interval = Interval(input_start_index, input_end_index)

                # Get read interval as partition indices.
                read_interval = intersect_intervals(input_interval,
                                                    partition_read_interval)

                # Get read interval as input file indices.
                read_interval = Interval(
                    read_interval.start - input_start_index,
                    read_interval.end - input_start_index)

                read_size = read_interval.end - read_interval.start

                with soundfile.SoundFile(str(input_file_path)) as sound_file:

                    if read_interval.start != 0:
                        sound_file.seek(read_interval.start)

                    samples = sound_file.read(read_size, dtype='int16')

                    start = partition_input_length
                    end = partition_input_length + read_size
                    input_samples[start:end] = samples

                print('    Reading {} {} {} {} {} {}...'.format(
                    i, input_file_path.name, input_length, read_interval.start,
                    read_interval.end, read_size))

                partition_input_length += read_size

                input_start_index += input_length

            duration = partition_input_length / INPUT_SAMPLE_RATE / 3600
            print('    Resampling {:.1f} hours of audio...'.format(duration))
            output_samples = resampling_utils.resample_to_24000_hz(
                input_samples[:partition_input_length], INPUT_SAMPLE_RATE)

            output_samples.shape = (1, len(output_samples))
            output_file_path = OUTPUT_DIR_PATH / output_file_name
            audio_file_utils.write_wave_file(str(output_file_path),
                                             output_samples,
                                             OUTPUT_SAMPLE_RATE)

            end_time = time.time()
            elapsed_time = end_time - start_time
            partition_duration = partition_input_length / INPUT_SAMPLE_RATE
            rate = partition_duration / elapsed_time
            print(
                ('    Processed {:.1f} seconds of audio in {:.1f} seconds, or '
                 '{:.1f} times faster than real time.').format(
                     partition_duration, elapsed_time, rate))
Ejemplo n.º 26
0
def create_silence(duration, sample_rate):
    length = signal_utils.seconds_to_frames(duration, sample_rate)
    samples = np.zeros((1, length))
    return Bunch(samples=samples, sample_rate=sample_rate)
Ejemplo n.º 27
0
def _s2f(seconds, sample_rate):
    frames = signal_utils.seconds_to_frames(seconds, sample_rate)
    return tf.cast(frames, tf.int64)