def process(self, event): if not isinstance(event, dict): raise ValueError("Expected event of type dict, got one of type %s" % (type(event),)) # We want the bindings in the event to override those in the static # dict, so we make of copy of the static dict and update the copy with # the event bindings = dict(self._static_variable_dict) bindings.update(event) command_args = self._make_command_sequence(bindings) # This will throw with a useful error message if the command can't be # run or if it runs and fails. if self.notify_stream: note = '[' + timestamp.get_timestamp() + '] Starting: ' + ' '.join(command_args) + '\n' self.notify_stream.write(note) (self._last_stdout_output, self._last_stderr_output, self._last_command_line) = process.subprocess(command_args) # If we got here, the subprocess finished successfully. if self.notify_stream: note = '[' + timestamp.get_timestamp() + '] Finished: ' + ' '.join(command_args) + '\n' self.notify_stream.write(note) self.send(event)
def process(self, event): if not isinstance(event, dict): raise ValueError( "Expected event of type dict, got one of type %s" % (type(event), )) # We want the bindings in the event to override those in the static # dict, so we make of copy of the static dict and update the copy with # the event bindings = dict(self._static_variable_dict) bindings.update(event) command_args = self._make_command_sequence(bindings) # This will throw with a useful error message if the command can't be # run or if it runs and fails. if self.notify_stream: note = '[' + timestamp.get_timestamp() + '] Starting: ' + ' '.join( command_args) + '\n' self.notify_stream.write(note) (self._last_stdout_output, self._last_stderr_output, self._last_command_line) = process.subprocess(command_args) # If we got here, the subprocess finished successfully. if self.notify_stream: note = '[' + timestamp.get_timestamp() + '] Finished: ' + ' '.join( command_args) + '\n' self.notify_stream.write(note) self.send(event)
def _get_sphere_fd(fileno, file_num_bytes, filename, wave_format, filename_abs): """ Low-level reading of NIST Sphere audio data. >>> module_dir, module_name = os.path.split(__file__) >>> zero_sph = os.path.join(module_dir, 'zero.sph') >>> shorten_sph = os.path.join(module_dir, 'shorten.sph') >>> with open(zero_sph, 'rb') as audio_file: ... file_info, audio_info, wave = _get_sphere_fd(audio_file.fileno(), os.path.getsize(zero_sph), zero_sph, 'int16', os.path.abspath(zero_sph)) >>> for key in sorted(file_info): print "%-24s %r" % (key, file_info[key]) file_item_bytes 2 file_item_coding 'int16' file_sndfile_extension 'sph' file_sndfile_format '' file_sndfile_type 'pcm SPH (NIST Sphere)' >>> for key in sorted(audio_info): print "%-24s %r" % (key, audio_info[key]) audio_num_channels 2 audio_num_samples 128064 audio_sample_rate 44100 >>> print str(wave) [[ 0 -1 -3 ..., 203 211 199] [ 0 0 -3 ..., 225 225 221]] >>> with open(shorten_sph, 'rb') as audio_file: ... file_info2, audio_info2, wave2 = _get_sphere_fd(audio_file.fileno(), os.path.getsize(shorten_sph), shorten_sph, 'int16', os.path.abspath(shorten_sph)) >>> for key in sorted(file_info2): print "%-24s %r" % (key, file_info2[key]) file_item_bytes 2 file_item_coding 'int16' file_sndfile_extension 'sph' file_sndfile_format '' file_sndfile_type 'pcm,embedded-shorten-v2.00 SPH (NIST Sphere)' >>> for key in sorted(audio_info2): print "%-24s %r" % (key, audio_info2[key]) audio_num_channels 1 audio_num_samples 37120 audio_sample_rate 20000 >>> print str(wave2) [[-1 1 1 ..., -4 -8 -5]] """ assert wave_format is None or wave_format in wave_c_formats # As of 2009-04-20 see the following for NIST's underspecified # format description: # http://ftp.cwi.nl/audio/NIST-SPHERE # http://www.ldc.upenn.edu/Catalog/docs/LDC93S5/WAV_SPEC.TXT nist_1a = 'NIST_1A' header1 = read_fd_strict(fileno, 128, filename_abs) if not header1.startswith(nist_1a): raise AudioTypeError( "did not find %r in header of purported NIST Sphere file %r" % (nist_1a, filename_abs)) nist, header_size, rest = header1.split(None, 2) assert nist == nist_1a header_size = int(header_size) rest += read_fd_strict(fileno, header_size - len(header1), filename_abs) # For now, we require the following fields: # sample_count -i 128064 # sample_n_bytes -i 2 # channel_count -i 2 # sample_byte_format -s2 01 # sample_rate -i 44100 # sample_coding -s3 pcm info = builtin.attrdict() for line in cStringIO.StringIO(rest): parts = line.split() if not parts or parts[0][0] == ';': continue if parts[0] == 'end_head': break if len(parts) < 3: raise AudioTypeError( "expected at least three white-space-separated fields in NIST header line %r in file %r" % (line.strip(), filename_abs)) field_name, field_type, field_value = line.split(None, 3) #print field_name, field_type, field_value if field_type in ('-i', '-r'): field_value, _, _ = field_value.partition(';') info[field_name] = (int if field_type == '-i' else float)(field_value) elif field_type.startswith('-s'): # here we do a stricter interpretation of the spec prefix_len = len(field_name + ' ' + field_type + ' ') str_len = int(field_type[2:]) info[field_name] = line[prefix_len:prefix_len + str_len] else: raise (AudioTypeError, "unhandled field_type %r for field_name %r" % (field_type, field_name)) missing = set( ('sample_count', 'sample_n_bytes', 'channel_count', 'sample_byte_format', 'sample_rate', 'sample_coding')) - set(info) if missing: raise AudioTypeError( "missing required header fields (%s) in NIST Sphere file %r" % (', '.join(sorted(missing)), filename_abs)) # this is a blunt check against bogus data if info.sample_n_bytes > 2: raise AudioTypeError( "unhandled sample_n_bytes, %d, in NIST Sphere file %r" % (info.sample_n_bytes, filename_abs)) audio_info = builtin.attrdict((('audio_num_channels', info.channel_count), ('audio_num_samples', info.sample_count), ('audio_sample_rate', info.sample_rate))) check_positive_ints(audio_info, filename_abs) file_info = builtin.attrdict(( ('file_item_bytes', info.sample_n_bytes), ('file_item_coding', ('int' + str(8 * info.sample_n_bytes)) if info.sample_coding.lower().startswith('pcm') else info.sample_coding[:4].lower()), ('file_sndfile_extension', 'sph'), ('file_sndfile_format', ''), ('file_sndfile_type', info.sample_coding + ' SPH (NIST Sphere)'), )) check_positive_ints(file_info, filename_abs) if wave_format is not None: args = 'sph2pipe', '-p', '-f', 'raw', filename_abs stdout, stderr, cmd = process.subprocess(args) if stderr: raise onyx.ExternalError( "unexpected stderr from command '%s': '%s'" % (cmd, stderr.strip())) num_audio_bytes = audio_info.audio_num_channels * audio_info.audio_num_samples * numpy.int16( ).itemsize if len(stdout) != num_audio_bytes: raise onyx.DataFormatError( "expected %d bytes of audio data, got %d" % (num_audio_bytes, len(stdout))) wave = numpy.fromstring(stdout, dtype=numpy.int16) assert wave.shape == (audio_info.audio_num_channels * audio_info.audio_num_samples, ) # reshape it, etc; then construct a new ndarray wave = wave.reshape((-1, audio_info.audio_num_channels)).transpose() wave = numpy.array(wave, dtype=wave_numpy_formats[wave_format]) # do scaling the same way libsndfile does if wave_format == 'int32': wave <<= 16 elif wave_format in ('float32', 'float64'): wave *= (1 / (1 << 15)) assert wave.shape == (audio_info.audio_num_channels, audio_info.audio_num_samples) else: wave = None return file_info, audio_info, wave
def _get_sphere_fd(fileno, file_num_bytes, filename, wave_format, filename_abs): """ Low-level reading of NIST Sphere audio data. >>> module_dir, module_name = os.path.split(__file__) >>> zero_sph = os.path.join(module_dir, 'zero.sph') >>> shorten_sph = os.path.join(module_dir, 'shorten.sph') >>> with open(zero_sph, 'rb') as audio_file: ... file_info, audio_info, wave = _get_sphere_fd(audio_file.fileno(), os.path.getsize(zero_sph), zero_sph, 'int16', os.path.abspath(zero_sph)) >>> for key in sorted(file_info): print "%-24s %r" % (key, file_info[key]) file_item_bytes 2 file_item_coding 'int16' file_sndfile_extension 'sph' file_sndfile_format '' file_sndfile_type 'pcm SPH (NIST Sphere)' >>> for key in sorted(audio_info): print "%-24s %r" % (key, audio_info[key]) audio_num_channels 2 audio_num_samples 128064 audio_sample_rate 44100 >>> print str(wave) [[ 0 -1 -3 ..., 203 211 199] [ 0 0 -3 ..., 225 225 221]] >>> with open(shorten_sph, 'rb') as audio_file: ... file_info2, audio_info2, wave2 = _get_sphere_fd(audio_file.fileno(), os.path.getsize(shorten_sph), shorten_sph, 'int16', os.path.abspath(shorten_sph)) >>> for key in sorted(file_info2): print "%-24s %r" % (key, file_info2[key]) file_item_bytes 2 file_item_coding 'int16' file_sndfile_extension 'sph' file_sndfile_format '' file_sndfile_type 'pcm,embedded-shorten-v2.00 SPH (NIST Sphere)' >>> for key in sorted(audio_info2): print "%-24s %r" % (key, audio_info2[key]) audio_num_channels 1 audio_num_samples 37120 audio_sample_rate 20000 >>> print str(wave2) [[-1 1 1 ..., -4 -8 -5]] """ assert wave_format is None or wave_format in wave_c_formats # As of 2009-04-20 see the following for NIST's underspecified # format description: # http://ftp.cwi.nl/audio/NIST-SPHERE # http://www.ldc.upenn.edu/Catalog/docs/LDC93S5/WAV_SPEC.TXT nist_1a = 'NIST_1A' header1 = read_fd_strict(fileno, 128, filename_abs) if not header1.startswith(nist_1a): raise AudioTypeError("did not find %r in header of purported NIST Sphere file %r" % (nist_1a, filename_abs)) nist, header_size, rest = header1.split(None, 2) assert nist == nist_1a header_size = int(header_size) rest += read_fd_strict(fileno, header_size - len(header1), filename_abs) # For now, we require the following fields: # sample_count -i 128064 # sample_n_bytes -i 2 # channel_count -i 2 # sample_byte_format -s2 01 # sample_rate -i 44100 # sample_coding -s3 pcm info = builtin.attrdict() for line in cStringIO.StringIO(rest): parts = line.split() if not parts or parts[0][0] == ';': continue if parts[0] == 'end_head': break if len(parts) < 3: raise AudioTypeError("expected at least three white-space-separated fields in NIST header line %r in file %r" % (line.strip(), filename_abs)) field_name, field_type, field_value = line.split(None, 3) #print field_name, field_type, field_value if field_type in ('-i', '-r'): field_value, _, _ = field_value.partition(';') info[field_name] = (int if field_type == '-i' else float)(field_value) elif field_type.startswith('-s'): # here we do a stricter interpretation of the spec prefix_len = len(field_name + ' ' + field_type + ' ') str_len = int(field_type[2:]) info[field_name] = line[prefix_len:prefix_len+str_len] else: raise (AudioTypeError, "unhandled field_type %r for field_name %r" % (field_type, field_name)) missing = set(('sample_count', 'sample_n_bytes', 'channel_count', 'sample_byte_format', 'sample_rate', 'sample_coding')) - set(info) if missing: raise AudioTypeError("missing required header fields (%s) in NIST Sphere file %r" % (', '.join(sorted(missing)), filename_abs)) # this is a blunt check against bogus data if info.sample_n_bytes > 2: raise AudioTypeError("unhandled sample_n_bytes, %d, in NIST Sphere file %r" % (info.sample_n_bytes, filename_abs)) audio_info = builtin.attrdict((('audio_num_channels', info.channel_count), ('audio_num_samples', info.sample_count), ('audio_sample_rate', info.sample_rate))) check_positive_ints(audio_info, filename_abs) file_info = builtin.attrdict((('file_item_bytes', info.sample_n_bytes), ('file_item_coding', ('int' + str(8*info.sample_n_bytes)) if info.sample_coding.lower().startswith('pcm') else info.sample_coding[:4].lower()), ('file_sndfile_extension', 'sph'), ('file_sndfile_format', ''), ('file_sndfile_type', info.sample_coding + ' SPH (NIST Sphere)'), )) check_positive_ints(file_info, filename_abs) if wave_format is not None: args = 'sph2pipe', '-p', '-f', 'raw', filename_abs stdout, stderr, cmd = process.subprocess(args) if stderr: raise onyx.ExternalError("unexpected stderr from command '%s': '%s'" % (cmd, stderr.strip())) num_audio_bytes = audio_info.audio_num_channels * audio_info.audio_num_samples * numpy.int16().itemsize if len(stdout) != num_audio_bytes: raise onyx.DataFormatError("expected %d bytes of audio data, got %d" % (num_audio_bytes, len(stdout))) wave = numpy.fromstring(stdout, dtype=numpy.int16) assert wave.shape == (audio_info.audio_num_channels * audio_info.audio_num_samples,) # reshape it, etc; then construct a new ndarray wave = wave.reshape((-1, audio_info.audio_num_channels)).transpose() wave = numpy.array(wave, dtype=wave_numpy_formats[wave_format]) # do scaling the same way libsndfile does if wave_format == 'int32': wave <<= 16 elif wave_format in ('float32', 'float64'): wave *= (1 / (1 << 15)) assert wave.shape == (audio_info.audio_num_channels, audio_info.audio_num_samples) else: wave = None return file_info, audio_info, wave