コード例 #1
0
    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word',
                            'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()
コード例 #2
0
ファイル: add_noise.py プロジェクト: jarkovic/veracrypt-voice
def main():
    args = create_parser(usage).parse_args()
    args.tags_file = abspath(args.tags_file) if args.tags_file else None
    args.folder = abspath(args.folder)
    args.output_folder = abspath(args.output_folder)
    noise_min, noise_max = args.noise_ratio_low, args.noise_ratio_high

    data = TrainData.from_both(args.tags_file, args.folder, args.folder)
    noise_data = NoiseData(args.noise_folder)
    print('Data:', data)

    def translate_filename(source: str, n=0) -> str:
        assert source.startswith(args.folder)
        relative_file = source[len(args.folder):].strip(os.path.sep)
        if n > 0:
            base, ext = splitext(relative_file)
            relative_file = base + '.' + str(n) + ext
        return join(args.output_folder, relative_file)

    all_filenames = sum(data.train_files + data.test_files, [])
    for i, filename in enumerate(all_filenames):
        print('{0:.2%}  \r'.format(i / (len(all_filenames) - 1)), end='', flush=True)

        audio = load_audio(filename)
        for n in range(args.inflation_factor):
            altered = noise_data.noised_audio(audio, noise_min + (noise_max - noise_min) * random())
            output_filename = translate_filename(filename, n)

            makedirs(dirname(output_filename), exist_ok=True)
            save_audio(output_filename, altered)

    print('Done!')

    if args.tags_file and args.tags_file.startswith(args.folder):
        shutil.copy2(args.tags_file, translate_filename(args.tags_file))
コード例 #3
0
def load_vector(name: str, vectorizer: Callable = None) -> np.ndarray:
    """Loads and caches a vector input from a wav or npy file"""
    import os
    
    #print('pr.max_samples:', pr.max_samples)
    #print('pr.n_features:', pr.n_features)
    #print('pr.n_mfcc:', pr.n_mfcc)
    #print('use_delta:', pr.use_delta)
    #print(vectorizer)
    
    vectorizer = vectorizer or (vectorize_delta if pr.use_delta else vectorize)
    #print(vectorizer)
    
    save_name = name if name.endswith('.npy') else os.path.join(
        '.cache', hashlib.md5(
            str(sorted(pr.__dict__.values())).encode()
        ).hexdigest(), vectorizer.__name__ + '.' + name + '.npy')

    #print(save_name)
    
    if os.path.isfile(save_name):
        return np.load(save_name)

    print('Loading ' + name + '...')
    os.makedirs(os.path.dirname(save_name), exist_ok=True)

    vec = vectorizer(load_audio(name))
    np.save(save_name, vec)
    return vec
コード例 #4
0
    def play_audio(audio_file):
        nonlocal stream
        if stream:
            stop_event.clear()
            stop_event.wait()
            stream.stop_stream()
            stream.close()
            stream = None
        audio = load_audio(audio_file)[-pr.buffer_samples:]
        audio /= 2 * min(audio.mean() + 4 * audio.std(), abs(audio).max())
        stream = p.open(format=paFloat32,
                        channels=1,
                        rate=pr.sample_rate,
                        output=True)
        stream.start_stream()

        def write_audio():
            data = audio.astype('float32').tostring()
            chunk_size = 1024
            for pos in range(chunk_size, len(data) + chunk_size, chunk_size):
                if not stop_event.is_set():
                    stop_event.set()
                    return
                stream.write(data[pos - chunk_size:pos])
            while stop_event.is_set():
                sleep(chunk_size / pr.sample_rate)
            stop_event.set()

        Thread(target=write_audio, daemon=True).start()
コード例 #5
0
 def __init__(self, noise_folder: str):
     self.noise_data = [
         load_audio(file) for file in glob(join(noise_folder, '*.wav'))
     ]
     self.noise_data_id = 0
     self.noise_pos = 0
     self.repeat_count = 0
コード例 #6
0
    def run(self):
        total = Metric(chunk_size=self.args.chunk_size)
        for i in glob(join(self.args.folder, '*.wav')):
            audio = load_audio(i)
            if audio.size == 0:
                continue

            predictions = self.evaluate(audio)
            detector = TriggerDetector(self.args.chunk_size,
                                       trigger_level=0,
                                       sensitivity=self.args.threshold)

            metric = Metric(
                chunk_size=self.args.chunk_size,
                seconds=len(audio) / pr.sample_rate,
                activated_chunks=(predictions > detector.sensitivity).sum(),
                activations=sum(detector.update(i) for i in predictions),
                activation_sum=predictions.sum())
            total.add(metric)
            print()
            print(metric.info_string(basename(i)))
            del audio
        print()
        print()
        print(total.info_string('Total'))
コード例 #7
0
 def generate_wakeword_pieces(self, volume):
     """Generates chunks of audio that represent the wakeword stream"""
     while True:
         target = 1 if random() > 0.5 else 0
         it = self.pos_files_it if target else self.neg_files_it
         sample_file = next(it)
         yield self.layer_with(
             self.normalize_volume_to(load_audio(sample_file), volume),
             target)
         yield self.layer_with(
             np.zeros(int(pr.sample_rate * (0.5 + 2.0 * random()))), 0)
コード例 #8
0
def load_vector(name: str, vectorizer: Callable = None) -> np.ndarray:
    """Loads and caches a vector input from a wav or npy file"""
    vectorizer = vectorizer or (vectorize_delta if pr.use_delta else vectorize)

    save_name = name if name.endswith('.npy') else get_cache_file(name)

    if os.path.isfile(save_name):
        return np.load(save_name)

    os.makedirs(os.path.dirname(save_name), exist_ok=True)

    vec = vectorizer(load_audio(name))
    np.save(save_name, vec)
    return vec
コード例 #9
0
    def __load_files(kw_files: list,
                     nkw_files: list,
                     vectorizer: Callable = None,
                     shuffle=True) -> tuple:
        from precise.params import pr

        input_parts = []
        output_parts = []

        vectorizer = vectorizer or (vectorize_delta
                                    if pr.use_delta else vectorize)
        cache = Pyache('.cache', lambda x: vectorizer(load_audio(x)),
                       pr.vectorization_md5_hash())

        def add(filenames, output):
            def on_loop():
                on_loop.i += 1
                print('\r{0:.2%}  '.format(on_loop.i / len(filenames)),
                      end='',
                      flush=True)

            on_loop.i = 0

            new_inputs = cache.load(filenames, on_loop=on_loop)
            new_outputs = np.array([[output] for _ in range(len(new_inputs))])
            if new_inputs.size == 0:
                new_inputs = np.empty((0, pr.n_features, pr.feature_size))
            if new_outputs.size == 0:
                new_outputs = np.empty((0, 1))
            input_parts.append(new_inputs)
            output_parts.append(new_outputs)
            print('\r       \r', end='', flush=True)

        print('Loading wake-word...')
        add(kw_files, 1.0)

        print('Loading not-wake-word...')
        add(nkw_files, 0.0)

        from precise.params import pr
        inputs = np.concatenate(input_parts) if input_parts else np.empty(
            (0, pr.n_features, pr.feature_size))
        outputs = np.concatenate(output_parts) if output_parts else np.empty(
            (0, 1))

        shuffle_ids = np.arange(len(inputs))
        if shuffle:
            np.random.shuffle(shuffle_ids)
        return inputs[shuffle_ids], outputs[shuffle_ids]
コード例 #10
0
def load_vector(name: str, vectorizer: Callable = vectorize) -> np.ndarray:
    """Loads and caches a vector input from a wav or npy file"""
    import os

    save_name = name if name.endswith('.npy') else os.path.join(
        '.cache', str(abs(hash(pr))), vectorizer.__name__ + '.' + name +
        '.npy')

    if os.path.isfile(save_name):
        return np.load(save_name)

    print('Loading ' + name + '...')
    os.makedirs(os.path.dirname(save_name), exist_ok=True)

    vec = vectorizer(load_audio(name))
    np.save(save_name, vec)
    return vec
コード例 #11
0
    def vectors_from_fn(self, fn: str):
        """
        Run through a single background audio file, overlaying with wake words.
        Generates (mfccs, target) where mfccs is a series of mfcc values and
        target is a single integer classification of the target network output for that chunk
        """
        audio = load_audio(fn)
        audio_volume = self.calc_volume(audio)
        audio_volume *= 0.4 + 0.5 * random()
        audio = self.normalize_volume_to(audio, audio_volume)

        self.listener.clear()
        chunked_bg = chunk_audio(audio, self.args.chunk_size)
        chunked_ww = self.chunk_audio_pieces(
            self.generate_wakeword_pieces(audio_volume), self.args.chunk_size)

        for i, (chunk_bg, (chunk_ww,
                           targets)) in enumerate(zip(chunked_bg, chunked_ww)):
            chunk = self.merge(chunk_bg, chunk_ww, 0.6)
            self.vals_buffer = np.concatenate(
                (self.vals_buffer[len(targets):], targets))
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            mfccs = self.listener.update_vectors(chunk)
            percent_overlapping = self.max_run_length(
                self.vals_buffer, 1) / len(self.vals_buffer)

            if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8:
                target = 1
            elif percent_overlapping < 0.5:
                target = 0
            else:
                continue

            if random() > 1.0 - self.args.save_prob:
                name = splitext(basename(fn))[0]
                wav_file = join('debug', 'ww' if target == 1 else 'nww',
                                '{} - {}.wav'.format(name, i))
                save_audio(wav_file, self.audio_buffer)
            yield mfccs, target
コード例 #12
0
def play_wav(filename, p: PyAudio):
    audio = load_audio(filename)