コード例 #1
0
import pathlib
import soundfile
import numpy

import shutil
shutil.rmtree('FDA', ignore_errors=True)

root = pathlib.Path('FDA_orig')
README = (root / 'README').read_text()
sentences = {
    int(k): v
    for k, v in
    [l.strip().split(' ', 1) for l in (root / 'orthographic.index').open()]
}

dataset = jbof.create_dataset('FDA', {'README': README})

import itertools
for file in itertools.chain(root.glob('rl/*.sig'), root.glob('sb/*.sig')):
    speech, speech_samplerate = soundfile.read(str(file),
                                               samplerate=20_000,
                                               channels=1,
                                               format='RAW',
                                               subtype='PCM_16',
                                               endian='BIG')
    laryngograph, laryngograph_samplerate = soundfile.read(str(
        file.with_suffix('.lar')),
                                                           samplerate=20_000,
                                                           channels=1,
                                                           format='RAW',
                                                           subtype='PCM_16',
コード例 #2
0
import jbof
import pathlib
import numpy

import shutil
shutil.rmtree('QUT_NOISE', ignore_errors=True)

root = pathlib.Path('QUT-NOISE_original')

dataset = jbof.create_dataset(
    'QUT_NOISE', {
        'README': (root / 'docs' / 'README.text').read_text(),
        'LICENSE': (root / 'QUT-NOISE' / 'LICENSE.txt').read_text()
    })

for noisefile in root.glob('QUT-NOISE/*.wav'):
    item = dataset.add_item(noisefile.stem, {})
    item.add_array_from_file('signal', noisefile, {})

    labelfile = (root / 'QUT-NOISE' / 'labels' / (noisefile.stem + '.lab.txt'))
    labels = []
    for line in labelfile.open():
        start, stop, label = line.split(maxsplit=2)
        labels.append((float(start), float(stop), label))
    labels = numpy.array(labels,
                         dtype=[('start', float), ('stop', float),
                                ('label', 'U32')])
    item.add_array('labels', labels, {})

    impulsefile = (root / 'QUT-NOISE' / 'impulses' /
                   (noisefile.stem + '.imp.txt'))
コード例 #3
0
        signal = defer(item).signal
        for algo in algos:
            task = defer(algo, signal, signal.metadata['samplerate'])
            tasklist.schedule(task,
                              metadata=dict(item=item,
                                            dataset=datasetname,
                                            algo=algo))

for task in tqdm(tasklist.run(nprocesses=16, autokill=600),
                 smoothing=0,
                 desc='processing'):
    pass  # works for all items and algos except YIN and TIMIT for SX136GCS0 and SI572DMT0

# collect all pitches into a new dataset:

dataset = jbof.create_dataset('ground truth data')
for task in tqdm(tasklist.done_tasks(), desc='collecting pitches'):
    source_item = task.metadata['item']
    if isinstance(source_item, str):
        print(task._id, type(task._id), source_item,
              task.metadata['algo'].__name__)
    metadata = source_item.metadata
    metadata['speech_dataset'] = task.metadata['dataset']
    metadata['noise_dataset'] = None
    metadata['algo'] = task.metadata['algo'].__name__
    metadata['speech'] = source_item.name
    itemname = f'{metadata["speech_dataset"]}_{metadata["algo"]}_{metadata["speech"]}'
    if dataset.has_item(itemname):
        dataset.delete_item(itemname)
    item = dataset.add_item(name=itemname, metadata=metadata)
    results = task.returnvalue
コード例 #4
0
import jbof
import pathlib
import soundfile
import numpy
from tqdm import tqdm

import shutil
shutil.rmtree('MOCHA-TIMIT_dataset', ignore_errors=True)

root = pathlib.Path('MOCHA-TIMIT_original')

README = (root / 'README_v1.2.txt').read_text()
LICENSE = (root / 'LICENCE.txt').read_text()
sentences = {int(k.strip('.')): v for k, v in [l.strip().split(' ', 1) for l in (root / 'mocha-timit.txt').open() if l.strip()]}

dataset = jbof.create_dataset('MOCHA-TIMIT_dataset', {'README': README,
                                                      'LICENSE': LICENSE})

import itertools
for file in tqdm(list(itertools.chain(root.glob('*/*.wav'),
                                      root.glob('unchecked/*/*.wav'))),
                 smoothing=0):
    # wav:
    speech, speech_samplerate = soundfile.read(str(file))
    # lar:
    laryngograph, laryngograph_samplerate = soundfile.read(str(file.with_suffix('.lar')))
    # lab:
    labels = []
    if file.with_suffix('.lab').exists():
        with file.with_suffix('.lab').open() as f:
            for line in f:
                start, stop, label = line.split()
コード例 #5
0
import jbof
import pathlib
import soundfile
import numpy

import shutil
shutil.rmtree('CMU_Arctic', ignore_errors=True)

root = pathlib.Path('CMU_Arctic_orig')
README = (root / 'CMU_Arctic_Databases.html').read_text()

dataset = jbof.create_dataset('CMU_Arctic', {'README': README})

for directory in root.iterdir():
    if directory.is_file(): continue
    acronym = directory.name[7:10]
    prompts = {}
    for line in (directory / 'etc' / 'txt.done.data').open('rt'):
        key, sentence = line.strip('( )').split(maxsplit=1)
        prompts[key] = sentence.strip('"')
    for wave in directory.glob('wav/*.wav'):
        name = wave.stem
        if not name in prompts:
            print(acronym, name)
        item = dataset.add_item(
            f'{acronym}_{name}',
            metadata={'transcription': prompts.get(name, None)})
        item.add_array_from_file('signal', wave)
コード例 #6
0
import jbof
import pathlib
from collections import defaultdict
import re
import numpy

import shutil
shutil.rmtree('PTDB_TUG', ignore_errors=True)

root = pathlib.Path('PTDB_TUG_orig')

dataset = jbof.create_dataset('PTDB_TUG', {
    'Recording Protocol': (root / 'RECORDING-PROTOCOL.txt').read_text(),
    'Speaker Profiles': (root / 'SPEAKER-PROFILES.txt').read_text(),
    'TIMIT Prompts': (root / 'TIMIT-PROMPTS.txt').read_text()})

sentences = {}
pattern = re.compile('([^.?!]+[.?!]).*\(([a-z0-9]+)\)')
for line in dataset.metadata['TIMIT Prompts'].split('\n'):
    if line.startswith(';') or not line:
        continue
    line, label = pattern.match(line).groups()
    sentences[label] = line

speaker_profiles = defaultdict(dict)
pattern = re.compile('([MF][0-9]{2})\s+([0-9]{2})\s+(Male|Female)\s+'
                     '(Ireland|USA|Canada|England|South Africa)\s+'
                     '(sa1,2 sx[0-9]+-[0-9]+\s+si[0-9]+-[0-9]+)\s*'
                     '(.*)')
for line in dataset.metadata['Speaker Profiles'].split('\n'):
    if line.startswith('Speaker') or line.startswith('-') or not line:
コード例 #7
0
import jbof
import pathlib
import soundfile
import numpy

import shutil
shutil.rmtree('KEELE', ignore_errors=True)

root = pathlib.Path('KEELE_orig')
README = (root / 'keele_pitch_database.htm').read_text()

dataset = jbof.create_dataset('KEELE', {
    'README': README})

for file in root.glob('*.pes'):
    # pet: text transcription
    metadata = {}
    transcription = []
    with file.with_suffix('.pet').open() as f:
        for line in f:
            if line == '\n':
                pass
            elif not line.startswith('LBO'):
                key, value = line.split(':', 1)
                metadata[key] = value.strip()
                if key == 'SAM':
                    metadata['samplerate'] = value.strip()
            else:
                _, line = line.split(':', 1) # discard 'LBO'
                start, _, stop, text = line.split(',', 3)
                transcription.append({'begin': int(start),
コード例 #8
0
import jbof
import pathlib
import soundfile
import numpy
import re

import shutil
shutil.rmtree('TIMIT_dataset', ignore_errors=True)

root = pathlib.Path('TIMIT_original')
README = (root / 'README.DOC').read_text()

dataset = jbof.create_dataset('TIMIT_dataset', {'README': README})

sentences = {}
for line in (root / 'TIMIT/DOC/PROMPTS.TXT').open():
    if line.startswith(';'): continue
    sentence, identifier = line.rsplit('(', maxsplit=1)
    identifier = identifier.strip(' ()\n')
    sentence = sentence.strip()
    sentences[identifier] = sentence

speakers = {}
dialects = {
    1: 'New England',
    2: 'Northern',
    3: 'North Midland',
    4: 'South Midland',
    5: 'Southern',
    6: 'New York City',
    7: 'Western',
コード例 #9
0
    ],
    m4nw0000=[
        63975, 124940, 221279, 268320, 326651, 352241, 417722, 474547, 520835,
        569381, 627712
    ],
    m5nw0000=[
        69754, 141758, 247965, 307369, 375323, 483330, 560735, 616088, 673692,
        747496
    ])

import KEELE

import shutil
shutil.rmtree('KEELE_mod', ignore_errors=True)

dataset = jbof.create_dataset('KEELE_mod',
                              {'README': KEELE.dataset.metadata['README']})

for item in KEELE.dataset.all_items():
    metadata = item.metadata
    pitch = item.pitch
    pitch_sr = int(pitch.metadata['samplerate'])
    laryngograph = item.laryngograph
    laryngograph_sr = int(laryngograph.metadata['samplerate'])
    signal = item.signal
    signal_sr = int(signal.metadata['samplerate'])

    breaks = [0, *labels[item.name], len(signal)]

    for idx in range(len(breaks) - 1):
        signal_start = breaks[idx]
        signal_stop = breaks[idx + 1]