Beispiel #1
0
 def initialize(self):
     # reversed to height width for easy processing
     if self.size is not None:
         self.size = as_tuple(self.size, N=2, t=int)
     segments = self.segments
     video_ext = as_tuple('' if self.video_ext is None else self.video_ext,
                          1, str)
     # ====== load jobs ====== #
     if isinstance(segments, str):
         if not os.path.exists(segments):
             raise ValueError('Path to segments must exists, however, '
                              'exist(segments)={}'.format(
                                  os.path.exists(segments)))
         if os.path.isdir(segments):
             file_list = get_all_files(segments)
             file_list = [(os.path.basename(i), i, 0.0, -1.0)
                          for i in file_list]  # segment, path, start, end
         else:  # csv file
             file_list = np.genfromtxt(segments, dtype=str, delimiter=' ')
     elif isinstance(segments, (tuple, list)):
         if isinstance(segments[0], str):  # just a list of path to file
             file_list = [(os.path.basename(i), os.path.abspath(i), 0.0,
                           -1.0) for i in segments]
         elif isinstance(segments[0], (tuple, list)):
             if len(segments[0]) != 4:
                 raise Exception(
                     'segments must contain information in following for:'
                     '[name] [path] [start] [end]')
             file_list = segments
     # filter using support audio extension
     file_list = [
         f for f in file_list if any(ext in f[1] for ext in video_ext)
     ]
     # convert into: audio_path -> segment(name, start, end, channel)
     self.jobs = defaultdict(list)
     names = []
     for segment, file, start, end in file_list:
         self.jobs[file].append((segment, float(start), float(end)))
         names.append(segment)
     self.jobs = sorted(self.jobs.items(), key=lambda x: x[0])
     # ====== load bounding box ====== #
     if self.boundingbox is not None:
         if not isinstance(self.boundingbox, dict):
             raise ValueError('Bounding box must be a dictionary')
         if set(names) != set(self.boundingbox.keys()):
             raise Exception(
                 'Segments names and boundingbox keys mismatch.')
     # ====== check output ====== #
     self.dataset = Dataset(self.output)
     self._temp_path = get_tempdir()
     print('Temporary dir created at:', self._temp_path)
     # remove old cache files
     for p in os.listdir(self._temp_path):
         os.remove(os.path.join(self._temp_path, p))
Beispiel #2
0
  def load_command(self):
    r""" Warden P. Speech Commands: A public dataset for single-word speech
      recognition, 2017. Available from
      http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz

    Sample rate: 16,000

    Example:
      ds = AudioFeatureLoader(sample_rate=16000,
                              frame_length=int(0.025 * 16000),
                              frame_step=int(0.005 * 16000))
      train, valid, test = ds.load_command()
      train = ds.create_dataset(train, max_length=40, return_path=True)
    """
    LINK = "http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz"
    MD5 = "a08eb256cea8cbb427c6c0035fffd881"
    save_path = os.path.join(self.save_path, 'speech_commands')
    if not os.path.exists(save_path):
      os.mkdir(save_path)
    audio_path = os.path.join(save_path, 'audio')
    audio_files = sorted(
        get_all_files(audio_path, filter_func=lambda x: '.wav' == x[-4:]))
    md5 = md5_checksum(''.join([os.path.basename(i) for i in audio_files]))
    # ====== Download and extract the data ====== #
    if md5 != MD5:
      zip_path = get_file(fname='speech_commands_v0.01.tar.gz',
                          origin=LINK,
                          outdir=save_path,
                          verbose=True)
      with tarfile.open(zip_path, 'r:gz') as tar:
        tar.extractall(audio_path)
    # ====== processing the audio file list ====== #
    audio_files = [i for i in audio_files if '_background_noise_' not in i]
    with open(os.path.join(audio_path, 'validation_list.txt'), 'r') as f:
      valid_list = {i.strip(): 1 for i in f}
    with open(os.path.join(audio_path, 'testing_list.txt'), 'r') as f:
      test_list = {i.strip(): 1 for i in f}
    train_files = []
    valid_files = []
    test_files = []
    for f in audio_files:
      name = '/'.join(f.split('/')[-2:])
      if name in valid_list:
        valid_files.append(f)
      elif name in test_list:
        test_files.append(f)
      else:
        train_files.append(f)
    return train_files, valid_files, test_files
Beispiel #3
0
def zip_aes(in_path, out_path, password=None, compression=True, verbose=False):
    r"""

  Parameters
  ----------
  in_path : string
    path to a folder
  out_path : string
    path to output zip file
  """
    if password is None:
        password = input("Your password:"******"`password`=%s length must be greater than 0" % password
    # ====== prepare input ====== #
    from odin.utils import get_all_files
    if not os.path.isdir(in_path):
        raise ValueError("`in_path` to %s is not a folder" % str(in_path))
    all_files = get_all_files(in_path)
    # ====== prepare output ====== #
    if not isinstance(out_path, string_types):
        raise ValueError("`out_path` must be string")
    f = zipfile.ZipFile(out_path,
                        'w',
                        compression=zipfile.ZIP_DEFLATED
                        if bool(compression) else zipfile.ZIP_STORED,
                        allowZip64=True)
    # ====== compression ====== #
    md5_map = {}
    for path in all_files:
        name = os.path.basename(path)
        md5_map[name] = md5_checksum(path)
        f.writestr(name, encrypt_aes(path, password + name))
        if verbose:
            print('Compressed: "%s"' % name, "(MD5:%s)" % md5_map[name])
    f.writestr('_MD5_CHECKSUM_', pickle.dumps(md5_map))
    f.close()
Beispiel #4
0
    def run(self, overrides=[], ncpu=None, **configs):
        r"""

    Arguments:
      strict: A Boolean, strict configurations prevent the access to
        unknown key, otherwise, the config will return `None`.

    Example:
      exp = SisuaExperimenter(ncpu=1)
      exp.run(
          overrides={
              'model': ['sisua', 'dca', 'vae'],
              'dataset.name': ['cortex', 'pbmc8kly'],
              'train.verbose': 0,
              'train.epochs': 2,
              'train': ['adam'],
          })
    """
        overrides = _overrides(overrides) + _overrides(configs)
        strict = False
        command = ' '.join(sys.argv)
        # parse ncpu
        if ncpu is None:
            ncpu = self.ncpu
        ncpu = int(ncpu)
        for idx, arg in enumerate(list(sys.argv)):
            if 'ncpu' in arg:
                if '=' in arg:
                    ncpu = int(arg.split('=')[-1])
                    sys.argv.pop(idx)
                else:
                    ncpu = int(sys.argv[idx + 1])
                    sys.argv.pop(idx)
                    sys.argv.pop(idx)
                break
        # check reset
        for idx, arg in enumerate(list(sys.argv)):
            if arg in ('--reset', '--clear', '--clean'):
                configs_filter = lambda f: 'configs' != f.split('/')[-1]
                if len(
                        get_all_files(self._save_path,
                                      filter_func=configs_filter)) > 0:
                    old_exps = '\n'.join([
                        " - %s" % i for i in os.listdir(self._save_path)
                        if configs_filter(i)
                    ])
                    inp = input("<Enter> to clear all exists experiments:"
                                "\n%s\n'n' to cancel, otherwise continue:" %
                                old_exps)
                    if inp.strip().lower() != 'n':
                        clean_folder(self._save_path,
                                     filter=configs_filter,
                                     verbose=True)
                sys.argv.pop(idx)
        # check multirun
        is_multirun = any(',' in ovr for ovr in overrides) or \
          any(',' in arg and '=' in arg for arg in sys.argv)
        # write history
        self.write_history(command, "overrides: %s" % str(overrides),
                           "strict: %s" % str(strict), "ncpu: %d" % ncpu,
                           "multirun: %s" % str(is_multirun))
        # generate app help
        hlp = '\n\n'.join([
            "%s - %s" % (str(key), ', '.join(sorted(as_tuple(val, t=str))))
            for key, val in dict(self.args_help).items()
        ])

        def _run(self, config_file, task_function, overrides):
            if is_multirun:
                raise RuntimeError(
                    "Performing single run with multiple overrides in hydra "
                    "(use '-m' for multirun): %s" % str(overrides))
            cfg = self.compose_config(config_file=config_file,
                                      overrides=overrides,
                                      strict=strict,
                                      with_log_configuration=True)
            HydraConfig().set_config(cfg)
            return run_job(
                config=cfg,
                task_function=task_function,
                job_dir_key="hydra.run.dir",
                job_subdir_key=None,
            )

        def _multirun(self, config_file, task_function, overrides):
            # Initial config is loaded without strict (individual job configs may have strict).
            from hydra._internal.plugins import Plugins
            cfg = self.compose_config(config_file=config_file,
                                      overrides=overrides,
                                      strict=strict,
                                      with_log_configuration=True)
            HydraConfig().set_config(cfg)
            sweeper = Plugins.instantiate_sweeper(
                config=cfg,
                config_loader=self.config_loader,
                task_function=task_function)
            # override launcher for using multiprocessing
            sweeper.launcher = ParallelLauncher(ncpu=ncpu)
            sweeper.launcher.setup(config=cfg,
                                   config_loader=self.config_loader,
                                   task_function=task_function)
            return sweeper.sweep(arguments=cfg.hydra.overrides.task)

        old_multirun = (Hydra.run, Hydra.multirun)
        Hydra.run = _run
        Hydra.multirun = _multirun

        try:
            # append the new override
            if len(overrides) > 0:
                sys.argv += overrides
            # help for arguments
            if '--help' in sys.argv:
                # sys.argv.append("hydra.help.header='**** %s ****'" %
                #                 self.__class__.__name__)
                # sys.argv.append("hydra.help.template=%s" % (_APP_HELP % hlp))
                # TODO : fix bug here
                pass
            # append the hydra log path
            job_fmt = "/${now:%d%b%y_%H%M%S}"
            sys.argv.insert(
                1, "hydra.run.dir=%s" % self.get_hydra_path() + job_fmt)
            sys.argv.insert(
                1, "hydra.sweep.dir=%s" % self.get_hydra_path() + job_fmt)
            sys.argv.insert(1, "hydra.sweep.subdir=${hydra.job.id}")
            # sys.argv.append(r"hydra.job_logging.formatters.simple.format=" +
            #                 r"[\%(asctime)s][\%(name)s][\%(levelname)s] - \%(message)s")
            args_parser = get_args_parser()
            run_hydra(
                args_parser=args_parser,
                task_function=self._run,
                config_path=self.config_path,
                strict=strict,
            )
        except KeyboardInterrupt:
            sys.exit(-1)
        except SystemExit:
            pass
        Hydra.run = old_multirun[0]
        Hydra.multirun = old_multirun[1]
        # update the summary
        self.summary()
        return self
Beispiel #5
0
                        help='Override exist models',
                        action='store_true')
    parser.add_argument('-ds', default='')
    args = parser.parse_args()
    OVERRIDE = bool(args.override)
    ## create the filter
    if args.ds:
        ds = set([str(i).lower() for i in args.ds.split(',')])
        fn_filter = lambda job: job['ds'] in ds
    else:
        fn_filter = lambda job: True
    ## just print some debugging
    if not args.train and not args.eval:
        log = sorted([
            (path.split('/')[-3:-1], path)
            for path in get_all_files(outdir, lambda path: 'log.txt' in path)
        ],
                     key=lambda x: x[0][0] + x[0][1])
        for (ds, model), path in log:
            print(ds, model)
            with open(path, 'r') as f:
                lines = [
                    line[:-1].split('at ')[-1] for line in f.readlines()
                    if 'best' in line
                ][-1:]
                for l in lines:
                    print(' ', l)
    ## run train or evaluation tasks
    else:
        main(mode='evaluate' if args.eval else 'train', fn_filter=fn_filter)
Beispiel #6
0
    def __init__(self,
                 segments,
                 output_path,
                 sr=None,
                 win=0.02,
                 shift=0.01,
                 nb_melfilters=24,
                 nb_ceps=12,
                 get_spec=True,
                 get_mspec=False,
                 get_mfcc=False,
                 get_qspec=False,
                 get_phase=False,
                 get_pitch=False,
                 get_vad=True,
                 get_energy=False,
                 get_delta=False,
                 fmin=64,
                 fmax=None,
                 sr_new=None,
                 preemphasis=0.97,
                 pitch_threshold=0.8,
                 pitch_fmax=800,
                 vad_smooth=3,
                 vad_minlen=0.1,
                 cqt_bins=96,
                 pca=True,
                 pca_whiten=False,
                 center=True,
                 audio_ext=None,
                 save_stats=True,
                 substitute_nan=None,
                 dtype='float16',
                 datatype='memmap',
                 ncache=0.12,
                 ncpu=1):
        super(SpeechProcessor, self).__init__(output_path=output_path,
                                              datatype=datatype,
                                              pca=pca,
                                              pca_whiten=pca_whiten,
                                              save_stats=save_stats,
                                              substitute_nan=substitute_nan,
                                              ncache=ncache,
                                              ncpu=ncpu)
        audio_ext = as_tuple('' if audio_ext is None else audio_ext,
                             t=string_types)
        # ====== load jobs ====== #
        # NOT loaded segments
        if isinstance(segments, str):
            if not os.path.exists(segments):
                raise ValueError('Path to segments must exists, however, '
                                 'exist(segments)={}'.format(
                                     os.path.exists(segments)))
            # given a directory
            if os.path.isdir(segments):
                file_list = get_all_files(segments)
                file_list = [(os.path.basename(i), i, 0.0, -1.0)
                             for i in file_list]  # segment, path, start, end
            # given csv file
            else:
                file_list = np.genfromtxt(segments, dtype=str, delimiter=' ')
        # LOADED segments
        elif isinstance(segments, (tuple, list)):
            # just a list of path to file
            if isinstance(segments[0], str):
                file_list = [(os.path.basename(i), os.path.abspath(i), 0.0,
                              -1.0) for i in segments]
            # list of all information
            elif isinstance(segments[0], (tuple, list)):
                if len(segments[0]) != 4 and len(segments[0]) != 5:
                    raise Exception(
                        'segments must contain information in following for:'
                        '[name] [path] [start] [end]')
                file_list = segments
        # filter using support audio extension
        file_list = [
            f for f in file_list
            if any(ext in f[1][-len(ext):] for ext in audio_ext)
        ]
        # if no channel is provided, append the channel
        file_list = [list(f) + [0] if len(f) == 4 else f for f in file_list]
        self.njobs = len(file_list)
        # convert into: audio_path -> segment(name, start, end, channel)
        self.jobs = defaultdict(list)
        for segment, file, start, end, channel in file_list:
            self.jobs[file].append(
                (segment, float(start), float(end), int(channel)))
        self.jobs = sorted(self.jobs.items(), key=lambda x: x[0])
        # check empty jobs
        if len(self.jobs) == 0:
            raise Exception('NO jobs found for processing.')
        # ====== which features to get ====== #
        if not get_spec and not get_mspec and not get_mfcc \
        and not get_pitch and not get_energy and not get_vad:
            raise Exception('You must specify which features you want: '
                            'spectrogram, filter-banks, MFCC, or pitch.')
        features_properties = []
        if get_mfcc: features_properties.append(('mfcc', dtype, True))
        if get_energy: features_properties.append(('energy', dtype, True))
        if get_spec: features_properties.append(('spec', dtype, True))
        if get_mspec: features_properties.append(('mspec', dtype, True))
        if get_qspec:
            features_properties.append(('qspec', dtype, True))
            if get_mspec: features_properties.append(('qmspec', dtype, True))
            if get_mfcc: features_properties.append(('qmfcc', dtype, True))
            if get_phase: features_properties.append(('qphase', dtype, True))
        if get_phase: features_properties.append(('phase', dtype, True))
        if get_pitch: features_properties.append(('pitch', dtype, True))
        if get_vad:
            features_properties.append(('vad', 'uint8', False))
            features_properties.append(('vadids', 'dict', False))
        self.__features_properties = features_properties

        self.get_spec = get_spec
        self.get_mspec = get_mspec
        self.get_mfcc = get_mfcc
        self.get_pitch = get_pitch
        self.get_qspec = get_qspec
        self.get_phase = get_phase
        self.get_vad = get_vad
        self.get_energy = get_energy
        self.get_delta = int(get_delta)
        self.primary_indices = ['mfcc']
        # ====== feature information ====== #
        self.sr = sr
        self.win = win
        self.shift = shift
        self.nb_melfilters = nb_melfilters
        self.nb_ceps = nb_ceps
        # constraint pitch threshold in 0-1
        self.pitch_threshold = min(max(pitch_threshold, 0.), 1.)
        self.pitch_fmax = pitch_fmax
        self.vad_smooth = vad_smooth
        self.vad_minlen = vad_minlen
        self.cqt_bins = cqt_bins
        self.fmin = fmin
        self.fmax = fmax
        self.sr_new = sr_new
        self.preemphasis = preemphasis
        self.center = center
inpath = args.path
outpath = '/home/trung/data/TIDIGITS_wav'
compress_path = '/home/trung/data/TIDIGITS.zip'
# ====== others ====== #
wav_path = os.path.join(inpath, "wave")
infopath = os.path.join(inpath, 'data/children/doc/spkrinfo.txt')
logpath = os.path.join(inpath, 'log.txt')
print("Input path:       ", ctext(inpath, 'cyan'))
print("Output path:      ", ctext(outpath, 'cyan'))
print("Convert to WAV at:", ctext(wav_path, 'cyan'))
print("Log path:         ", ctext(logpath, 'cyan'))
stdio(logpath)

exts = get_all_ext(inpath)
audio_files = get_all_files(
    inpath,
    filter_func=lambda f: f[-4:] == '.wav' and f.split('/')[-3] in
    ('girl', 'boy', 'man', 'woman'))
# ID     Gender     Age     Dialect    Usage
# ID - Unique 2-character speaker identifier
# Gender - (M-man, W-woman, B-boy, G-girl)
# Age - Speaker age at time of recording
# Dialect - Dialect region identifier (see file "dialects.txt" for decode)
# Usage - (TST-test material, TRN-training material)
info = np.genfromtxt(infopath, dtype=str, skip_header=12)
info = {
    ID.lower(): (Gender.lower(), Age, Dialect, Usage)
    for ID, Gender, Age, Dialect, Usage in info
}
gender_map = {"man": "m", "woman": "w", "boy": "b", "girl": "g"}
usage_map = {"TST": "test", "TRN": "train"}
Beispiel #8
0
from odin import fuel as F, nnet as N, backend as K, training
from odin.utils import get_all_files, get_datasetpath
from odin.stats import freqcount
from odin.basic import has_roles, WEIGHT, BIAS

# ===========================================================================
# Const
# ===========================================================================
FEAT = 'mspec'  # using mel-spectrogram
np.random.seed(12082518)

# ===========================================================================
# Load wav files
# ===========================================================================
wav_path = F.load_commands_wav()
print("Found:", len(get_all_files(wav_path,
                                  filter_func=lambda x: '.wav' in x)),
      " .wav files")
datapath = get_datasetpath("commands", override=False)
# ====== start preprocessing audio files ====== #
if False:
    speech = F.SpeechProcessor(wav_path,
                               datapath,
                               win=0.025,
                               shift=0.01,
                               nb_melfilters=40,
                               nb_ceps=13,
                               get_spec=True,
                               get_mspec=True,
                               get_mfcc=True,
                               get_qspec=True,
                               get_phase=True,
Beispiel #9
0
inpath = args.path
outpath = '/home/trung/data/TIDIGITS_wav'
compress_path = '/home/trung/data/TIDIGITS.zip'
# ====== others ====== #
wav_path = os.path.join(inpath, "wave")
infopath = os.path.join(inpath, 'data/children/doc/spkrinfo.txt')
logpath = os.path.join(inpath, 'log.txt')
print("Input path:       ", ctext(inpath, 'cyan'))
print("Output path:      ", ctext(outpath, 'cyan'))
print("Convert to WAV at:", ctext(wav_path, 'cyan'))
print("Log path:         ", ctext(logpath, 'cyan'))
stdio(logpath)

exts = get_all_ext(inpath)
audio_files = get_all_files(inpath,
                filter_func=lambda f: f[-4:] == '.wav' and
                            f.split('/')[-3] in ('girl', 'boy', 'man', 'woman'))
# ID     Gender     Age     Dialect    Usage
# ID - Unique 2-character speaker identifier
# Gender - (M-man, W-woman, B-boy, G-girl)
# Age - Speaker age at time of recording
# Dialect - Dialect region identifier (see file "dialects.txt" for decode)
# Usage - (TST-test material, TRN-training material)
info = np.genfromtxt(infopath, dtype=str, skip_header=12)
info = {ID.lower(): (Gender.lower(), Age, Dialect, Usage)
        for ID, Gender, Age, Dialect, Usage in info}
gender_map = {
    "man": "m",
    "woman": "w",
    "boy": "b",
    "girl": "g"
Beispiel #10
0
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
import seaborn

import numpy as np
import shutil
import os
from odin import fuel as F, utils
from odin.preprocessing import speech
from odin import visual

datapath = F.load_digit_wav()
print(datapath)
files = utils.get_all_files(datapath, lambda x: '.wav' in x)
y, sr = speech.read(files[0])
print('Raw signal:', y.shape, sr)

feat = speech.speech_features(y,
                              sr,
                              win=0.02,
                              shift=0.01,
                              nb_melfilters=40,
                              nb_ceps=13,
                              get_spec=True,
                              get_mspec=True,
                              get_mfcc=True,
                              get_qspec=True,
                              get_phase=True,
                              get_pitch=True,