Python Sndfileの例、scikits.audiolab.Sndfile Pythonの例

コード例 #1

0

ファイルを表示

ファイル: Exporter_20120828.py プロジェクト: arizona-phonological-imaging-lab/misc

def write_clip(stimulus, stimtime, output_dir, num_channels, sample_rate, data):
    # Set format for Audiolab file export AU or WAV 16bit (encoding originally 'float32')
    format1 = audio.Format(type='au', encoding='pcm16')
    format2 = audio.Format(type='wav', encoding='pcm16')

    # Single File output -- Needs tweaking to work right
    if stimulus == 'sound':
        nameSingle = output_dir + '/' + str(stimtime.secs) + '.' + str(stimtime.nsecs) + '_' + stimulus + '_All.wav'
        single_file = audio.Sndfile(nameSingle, 'w', format2, num_channels, sample_rate)
    
    file_name = output_dir + '/' + str(stimtime.secs) + '.' + str(stimtime.nsecs) + '_' + stimulus + '_Channel-1.wav'
    sound_file = audio.Sndfile(file_name, 'w', format2, 1, sample_rate)
    
    # Create list of sound file entities with unique channel names
    # Each channel output separately
##    channel_files = []
##    for channel in range(0,num_channels):
##        name = output_dir + '/' + str(stimtime.secs) + '.' + str(stimtime.nsecs) + '_' + stimulus + '_Channel-%d.wav' % (channel+1)
##        channel_files.append(audio.Sndfile(name, 'w', format2, 1, sample_rate))

    # Manipulate the data to each individual sound file
    for line in data:
        soundarray = array(line) # shape: (16384,)
        soundarray = reshape(soundarray, (len(soundarray)/num_channels, num_channels)) # shape: (4096, 4)
        #single_file.write_frames(soundarray)
        soundarray = array_split(soundarray, num_channels, axis=1) # splits into list with 4 arrays of shape (4096, 1)
        sound_file.write_frames(soundarray[0])
##        for channel in range(0,num_channels):
##            channel_files[channel].write_frames(soundarray[channel])

    # Finish each of the files
    sound_file.sync()

コード例 #2

0

ファイルを表示

 def write(self, filename, encoding="pcm16", endianness="file", fileformat=None):
     if not fileformat:
         fileformat = os.path.basename(filename).split(".")[-1]
     format = AL.Format(fileformat, encoding, endianness)
     f = AL.Sndfile(filename, "w", format, self.channels, self.samplerate)
     f.write_frames(self.samples)
     f.close()

コード例 #3

0

ファイルを表示

def process_wavfiles(dirname, prepend_dir, audiotype, speaker_name, spk_id, db,
                     prompts, dbfile):

    dirlist = os.listdir(prepend_dir + dirname + '/' + audiotype + '/')
    wavfile_ids = []

    for fname in dirlist:
        try:
            sfile = audio.Sndfile(
                prepend_dir + dirname + '/' + audiotype + '/' + fname, 'r')
        except:
            print prepend_dir, dirname, audiotype, fname
            assert False, "could not open soundfile"
        #print sfile.nframes, sfile.encoding, sfile.format, sfile.samplerate, sfile.channels, sfile.file_format
        #sndvec = sfile.read_frames(sfile.nframes)
        #sndvec_comp = sndvec.tolist()
        sndfile = open(prepend_dir + dirname + '/' + audiotype + '/' + fname,
                       'r')

        #print np.size(sndvec,0), np.size(sndvec,1)
        #audio.play(sndvec, sfile.samplerate)

        if fname.split('.')[0] in prompts.keys():
            insert_prompt = prompts[fname.split('.')[0]]
        else:
            insert_prompt = ''

        #start = time.time()
        #try:
        #audio_id = db.audio.insert({'frames': sfile.nframes, 'encoding': sfile.encoding,
        #                            'rate': sfile.samplerate, 'data': oid,
        #                            'ch': sfile.channels, 'speaker': spk_id, #'type': sndvec.dtype.name,
        #                            'prompt': insert_prompt, 'filename': fname.split('.')[0], 'speaker_name': speaker_name})

        #except pymongo.errors.AutoReconnect:
        #    print db.error()
        #    print db.last_status()
        audio_rec = Audio()
        audio_rec.frames = sfile.nframes
        audio_rec.encoding = fname.split('.')[1]
        audio_rec.rate = sfile.samplerate
        audio_rec.data = sndfile
        audio_rec.data.content_type = 'audio/' + audiotype
        audio_rec.ch = sfile.channels
        audio_rec.speaker = spk_id
        audio_rec.prompt = insert_prompt
        audio_rec.filename = fname.split('.')[0]
        audio_rec.speaker_name = speaker_name

        try:
            audio_rec.save(safe=True)
            audio_id = audio_rec.id
        except OperationError:
            assert False, "Insert failed"

        if audio_id:
            wavfile_ids.append(audio_id)
        else:
            assert False, "Error on insert"
    return wavfile_ids

コード例 #4

0

ファイルを表示

ファイル: vad_eval.py プロジェクト: AliceWu5/vad-3

def read_soundfile(filename):
    soundfile = al.Sndfile(filename, 'r')
    signal = soundfile.read_frames(soundfile.nframes)
    if soundfile.channels == 1:
        return signal, soundfile.samplerate
    else:
        return signal[:, 0], soundfile.samplerate

コード例 #5

0

ファイルを表示

ファイル: gene.py プロジェクト: mrG7/GeneticSynthesis

 def to_sound_file(self, c, filepath):
     output_file = audiolab.Sndfile(
         filepath, 'w', audiolab.Format(type='aiff',
                                        encoding=self.encoding),
         self.channels, self.samplerate)
     output_file.write_frames(self.to_numpy(c))
     output_file.close()

コード例 #6

0

ファイルを表示

    def __iter__(self):
        if isinstance(self.filename, str):
            f = audiolab.Sndfile(self.filename)
        elif np.iterable(self.filename):
            f = MockSndfile(self.filename, samplerate=44100)
        else:
            raise ValueError, 'Invalid filename: %s' % self.filename

        nbuf = self.nbuf
        end = self.end
        if not end:
            end = f.nframes
        if not nbuf:
            nbuf = 10*f.samplerate

        pos = f.seek(self.start)
        nremaining = end - pos
        while nremaining > 0:
            if nremaining < nbuf:
                nbuf = nremaining
            try:
                yield f.read_frames(nbuf)
                nremaining -= nbuf 
            except RuntimeError:
                nremaining = 0
        f.close()

コード例 #7

0

ファイルを表示

ファイル: icapp.py プロジェクト: afcarl/icapp

def make_output_files(inputFilenames, outputdir, auformat, samplerate):
    logging.debug("Making output files in directory %s" % outputdir)
    outputFiles = []
    for infile in inputFilenames:
        ofn = "%s/%s" % (outputdir, os.path.basename(infile))
        outputFiles.append(al.Sndfile(ofn, 'w', auformat, 1, samplerate))
    return outputFiles

コード例 #8

0

ファイルを表示

ファイル: transforms.py プロジェクト: veritasalice/audio-sketch

def get_audio(filepath, seg_start, seg_duration, targetfs=None, verbose=True):
    """ for use only with wav files from rwc database """

    # rewriting using scikits.audiolab
    import scikits.audiolab as audiolab
    # small hack search for alternate
    if not op.exists(filepath):
        filepath = op.splitext(filepath)[0] + '.wav'
    if not op.exists(filepath):
        filepath = op.splitext(filepath)[0] + '.WAV'
    sndfile = audiolab.Sndfile(filepath, 'r')
    fs = sndfile.samplerate
    (n, c) = (sndfile.nframes, sndfile.channels)
    if verbose: print "Reading"
    #  initalize  position
    sndfile.seek(int(seg_start * fs), 0, 'r')
    audiodata = sndfile.read_frames(int(seg_duration * fs))

    sndfile.close()
    if verbose: print "Done"
    if targetfs is not None and not (targetfs == fs):
        if verbose: print "Resampling"
        sig = Signal(audiodata, fs)
        sig.resample(targetfs)
        audiodata = sig.data
        fs = targetfs
        if verbose: print "Done"
    return audiodata, fs

コード例 #9

0

ファイルを表示

def get_max_level(filename):
    max_value = 0
    buffer_size = 4096
    audio_file = audiolab.Sndfile(filename, 'r')
    n_samples_left = audio_file.nframes

    while n_samples_left:
        to_read = min(buffer_size, n_samples_left)

        try:
            samples = audio_file.read_frames(to_read)
        except RuntimeError:
            # this can happen with a broken header
            break

        # convert to mono by selecting left channel only
        if audio_file.channels > 1:
            samples = samples[:, 0]

        max_value = max(max_value, numpy.abs(samples).max())

        n_samples_left -= to_read

    audio_file.close()

    return max_value

コード例 #10

0

ファイルを表示

 def get_audio_data(self, basedir=TIMIT_DIR):
     import scikits.audiolab as al
     filename = os.path.join(basedir, self.usage, self.dialect,
                             self.sex + self.speaker_id,
                             self.sentence_id + '.wav')
     f = al.Sndfile(filename, 'r')
     data = f.read_frames(f.nframes, dtype=np.float64)
     return data[self.start:self.stop]

コード例 #11

0

ファイルを表示

ファイル: audio.py プロジェクト: ivanov/imagen

    def _load_audio_file(self):
        source = audiolab.Sndfile(self.filename, 'r')

        # audiolab scales the range by the bit depth automatically so the dynamic range is now [-1.0, 1.0]
        # we rescale it to the range [0.0, 1.0]
        self.time_series = (
            source.read_frames(source.nframes, dtype=self.precision) + 1) / 2
        self.sample_rate = source.samplerate

コード例 #12

0

ファイルを表示

 def read(self, filename, dtype=np.float64):
     f = AL.Sndfile(filename, 'r')
     self.samplerate = f.samplerate
     self.samples = f.read_frames(f.nframes, dtype=dtype)
     if len(self.samples.shape) == 2:
         self.channels = self.samples.shape[1]
     else:
         self.channels = 1
     f.close()

コード例 #13

0

ファイルを表示

ファイル: samples.py プロジェクト: musictheory/piano.js

def write_file(path, frames, rate=SAMPLE_RATE):
    format = audiolab.Format("wav", "pcm16")
    sndfile = audiolab.Sndfile(path,
                               "w",
                               format=format,
                               channels=len(frames.shape),
                               samplerate=rate)
    sndfile.write_frames(frames)
    sndfile.sync

コード例 #14

0

ファイルを表示

def get_framer_audio_audiolab(filename, size, hop):
    from scikits import audiolab

    loader = audiolab.Sndfile(filename)
    sr = loader.samplerate
    nframes = loader.nframes
    nchannels = loader.channels

    framer = framer_audio_audiolab(loader, size, hop)

    return framer, sr, int(math.ceil(float(nframes) / hop)), nchannels, loader

コード例 #15

0

ファイルを表示

def write_clip(stimulus, stimtime, dirname, num_channels, sample_rate, data):
    format = audio.Format(type='au', encoding='float32')
    name = dirname + '/' + str(stimtime.secs) + '.' + str(
        stimtime.nsecs) + '_' + stimulus + '.au'
    print name
    soundfile = audio.Sndfile(name, 'w', format, num_channels, sample_rate)
    for d in data:
        sndarray = array(d)
        sndarray = reshape(sndarray,
                           (len(sndarray) / num_channels, num_channels))
        soundfile.write_frames(sndarray)
    soundfile.sync()

コード例 #16

0

ファイルを表示

def read_file(path):
    if al != None:
        soundfile = al.Sndfile(path, 'r')
        return soundfile.read_frames(soundfile.nframes)
    else:
        try:
            print("Warning: no audiolab. Trying to read WAV: " + path)
            wav = wavfile.read(path)[1]
            wav = np.float64(wav) / np.iinfo(np.int16).max
            return wav
        except ValueError:
            return None

コード例 #17

0

ファイルを表示

ファイル: gene.py プロジェクト: mrG7/GeneticSynthesis

    def __init__(self, source_file, target_file):
        sf = audiolab.Sndfile(source_file)
        self.encoding = sf.encoding
        self.channels = 1  # We only load first channel
        self.samplerate = sf.samplerate
        sf.close()

        self.source = load_sound_file(source_file)
        self.target = load_sound_file(target_file)
        self.split_source = []
        for split_range in num_zero_crossings_per_chunk:
            print split_range
            self.split_source += split_by_zero_crossings(
                self.source, split_range)
        self.max_gene = len(self.split_source) - 1
        self.ideal_length = len(self.target)

コード例 #18

0

ファイルを表示

def audio_test():
    aud = Audio.objects.first()

    print 'Frames: %i' % aud.frames
    print 'Encoding: %s' % aud.encoding
    print 'Rate: %i' % aud.rate
    print 'Channels: %i' % aud.ch
    print 'Speaker: %s' % Speaker.objects.with_id(aud.speaker).name
    print 'Filename: %s' % aud.filename
    print 'Speaker Name: %s' % aud.speaker_name

    fname_out = self_extract(aud.data, aud.filename + '.' + aud.encoding)
    sfile = audio.Sndfile(fname_out, 'r')
    sndvec = sfile.read_frames(aud.frames)

    audio.play(sndvec, aud.rate)

コード例 #19

0

ファイルを表示

    def __init__(self, input_filename, fft_size, window_function=numpy.hanning):
        max_level = get_max_level(input_filename)

        self.audio_file = audiolab.Sndfile(input_filename, 'r')
        self.fft_size = fft_size
        self.window = window_function(self.fft_size)
        self.spectrum_range = None
        self.lower = 100
        self.higher = 22050
        self.lower_log = math.log10(self.lower)
        self.higher_log = math.log10(self.higher)
        self.clip = lambda val, low, high: min(high, max(low, val))

        # figure out what the maximum value is for an FFT doing the FFT of a DC signal
        fft = numpy.fft.rfft(numpy.ones(fft_size) * self.window)
        max_fft = (numpy.abs(fft)).max()
        # set the scale to normalized audio and normalized FFT
        self.scale = 1.0/max_level/max_fft if max_level > 0 else 1

コード例 #20

0

ファイルを表示

ファイル: samples.py プロジェクト: musictheory/piano.js

def read_file(path, duration=None, rate=SAMPLE_RATE):
    wav_file = path + ".wav"
    m4a_file = path + ".m4a"

    if not os.path.exists(wav_file):
        subprocess.call(
            "/usr/bin/afconvert -f WAVE -d LEI16@44100 {0} {1}".format(
                m4a_file, wav_file),
            shell=True)

    sndfile = audiolab.Sndfile(wav_file, samplerate=rate)

    nframes = sndfile.nframes

    if duration != None:
        nframes = duration * SAMPLE_RATE

    return sndfile.read_frames(nframes, dtype=np.float64)

コード例 #21

0

ファイルを表示

def gen(chain, filepath):
    chain_depth = chain["chain_depth"]
    bin_size = chain["bin_size"]
    # Reset the bucket index to its starting value
    bucket_index = 0

    num_buckets = 2 / bin_size + 1  # The total number of buckets, including one special bucket for before the start of the audio.
    for i in range(chain_depth):
        bucket_index = bucket_index * num_buckets + num_buckets - 1

    new_data = np.empty([chain["samplerate"] * 15])  # 15 seconds of sound
    try:
        for i in range(new_data.size):
            if i % (5 * chain["samplerate"]
                    ) == 0:  # Print progress every 5 seconds
                print i / chain["samplerate"]
            tc = chain[bucket_index]
            new_datum = 0

            #print tc
            weighted_index = random.randrange(tc["sum"])
            for k in tc.keys():
                if k not in ["sum", "samplerate", "bin_size", "chain_depth"]:
                    if weighted_index < tc[k]:
                        new_datum = k
                        break
                    else:
                        weighted_index -= tc[k]
            new_data[i] = new_datum
            #print new_data[0:i]

            # Compute the bucket_index for the next iteration
            val = (new_data[i] -
                   new_data[i] % bin_size) / bin_size + 1 / bin_size
            print val
            bucket_index = int((bucket_index * num_buckets + val) %
                               (num_buckets**chain_depth))
    finally:
        format = audiolab.Format('wav')
        f3 = audiolab.Sndfile(filepath, 'w', format, 1, chain["samplerate"])
        f3.write_frames(new_data)
        f3.close()

コード例 #22

0

ファイルを表示

def create_data(f, s):

    # Write orange tab file statistical information:
    f.write('filename\tHZCRR\tLSTER\tSF\tspeech-music\n')
    f.write('discrete\tcontinuous\tcontinuous\tcontinuous\tspeech music\n')
    f.write('ignore\t\t\t\tclass\n')

    # for each file in the directory specified with s,
    for infile in glob.glob('data/' + s + '/*.wav'):
        file, ext = os.path.splitext(infile)
        aud = al.Sndfile(infile, 'r')
        aud = (aud, file[-4:] + ext)
        if aud[1][:2] == 'sp':
            audioClass = 'speech'
        else:
            audioClass = 'music'
        data.append(aud)  # Add the audio file and name tupple to list
        featTup = evaluate(aud)  # Give the feature tupple of the file
        # Write the results to the appropriate .tab file
        f.write(featTup[0] + '\t' + str(featTup[1]) + '\t' + str(featTup[2]) +
                '\t' + str(featTup[3]) + '\t' + audioClass + '\n')

コード例 #23

0

ファイルを表示

ファイル: spectrogram.py プロジェクト: itsbanderson/audio-detective

def get_spectrogram(filename, fft_length):
    fp = audiolab.Sndfile(filename, 'r')
    sample_rate = fp.samplerate
    total_num_samps = fp.nframes

    num_fft = (total_num_samps / fft_length ) - 2
    # create temporary working array
    fft_buckets = np.zeros((num_fft, fft_length), float)
    channels = fp.channels

    # read in the data from the file
    for i in range(num_fft):
        frames = fp.read_frames(fft_length)
        if channels == 2:
            # TODO: figure out how to combine channels appropriately
            fft_buckets[i,:] = frames[:,0] - 128.0
        elif channels == 1:
            fft_buckets[i,:] = frames - 128.0
        else:
            raise Exception("Unsupported # of channels: %d" % channels)

    # Window the data
    fft_buckets = fft_buckets * np.hamming(fft_length)

    # Transform with the FFT, Return Power
    freq_pwr  = 10*np.log10(1e-20 + abs(rfft(fft_buckets, fft_length)))

    # Plot the result
    n_out_pts = (fft_length / 2) + 1
    y_axis_hz = 0.5 * float(sample_rate) / n_out_pts * np.arange(n_out_pts)
    y_axis = y_axis_hz / 1000
    x_axis = (total_num_samps / float(sample_rate)) / num_fft * np.arange(num_fft)
    plt.xlabel('Time (sec)')
    plt.ylabel('Frequency (kHz)')

    plt.pcolormesh(x_axis, y_axis, freq_pwr.transpose())
    plt.xlim([0,x_axis.max()])
    plt.ylim([0,y_axis.max()])
    plt.colorbar()
    plt.show()

コード例 #24

0

ファイルを表示

def run():
    # NOTE: Replace this with the name of the test example to perform SS on
    test_name = "grace_short"
    wf = skal.Sndfile("%s\\..\\..\\test\\%s.wav" % (SCRIPT_PATH, test_name), "r")
    p = pa.PyAudio()

    width = int(wf.encoding[-2:])/8
    fs = wf.samplerate

    # NOTE: Replace this with the names of the instruments that are in the audio
    user_instr_names = ['piano', 'trumpet']



    stream = p.open(format=p.get_format_from_width(4),
                    channels=1,
                    rate=wf.samplerate,
                    output=True)

    with open(MODEL_PATH, 'rb') as template_file:
        templates = cPickle.load(template_file)
        instr_to_cols = dict()

        cols = []

        idx = 0
        for instr in user_instr_names:
            instr_templates = templates[instr]
            L = 0
            # For now, we're just discarding the pitch info
            for feature, chroma, octave in instr_templates:
                cols.append(feature)
                L += feature.shape[1]
            instr_to_cols[instr] = (idx, idx + L)
            idx += L

        W = np.concatenate(cols, axis=1)


    #encoded_frames = dict(zip(user_instr_names, [[] for _ in range(len(user_instr_names))]))
    raw_frames = dict(zip(user_instr_names, [[] for _ in range(len(user_instr_names))]))
    divergence = []

    # NOTE: Change this to use the adaptive algorithm or not
    adaptive = False

    threshold = 3
    src_sep = plca_learn.AdaptiveSourceSeparator(W, user_instr_names, instr_to_cols, threshold, fs, CHUNK_SIZE, adaptive)

    i = 0
    frames_left = wf.nframes
    while frames_left > 0:
        frames_requested = min(CHUNK_SIZE, frames_left)
        data = wf.read_frames(frames_requested, dtype=np.float32)
        processed_frames, div = src_sep.process_segment(data)
        for instr in processed_frames:
            raw_source = processed_frames[instr]
            #encoded_frames[instr].append(encoded_source)
            raw_frames[instr].append(raw_source)
        divergence.append(div)
        i += 1
        frames_left -= frames_requested
        print "========= Frames Completed: %d/%d ==========" % (wf.nframes - frames_left, wf.nframes)

    print "Done processing frames."

    stream.stop_stream()
    stream.close()

    p.terminate()

    format = skal.Format('wav')
    mode_desc = "-adapt" if adaptive else "-noadapt"
    for instr, audio_data in raw_frames.items():
        output = skal.Sndfile("%s\\..\\..\\%s-separated-%s%s.wav" % (SCRIPT_PATH, test_name, instr, mode_desc),\
                'w', format, 1, fs)

        for frame in audio_data:
            output.write_frames(frame)
        output.close()

    wf.close()

    with open("%s\\..\\..\\divergence_results-%s%s.txt" % (SCRIPT_PATH, test_name, mode_desc), 'w+') as f:
        f.write("Divergence Results\n")
        f.write("====================\n\n")
        for i, div in enumerate(divergence):
            f.write("    -- Segment %d: %f\n" % (i, div))

コード例 #25

0

ファイルを表示

ファイル: format_data_rnn_test.py プロジェクト: Judonlee/scream-detection

def create_data(sig, label):
    mfcc = get_mfcc(sig, freq, winstep=window_step, winlen=window_size, nfft=2048, lowfreq=lowfreq,
                    highfreq=highfreq, numcep=size, nfilt=size+2)
    num_label = label_dic[label]*np.ones(len(mfcc))
    time_per_occurrence_class[label_dic[label]].append((stop - start) / (10.0 ** 7))
    return mfcc, num_label

####### Main Loop ##########
for i in xrange(len(file_list)):
    lab_name = file_list[i] # os.path.split(os.path.join(wav_dir,file_list[i]))[1]
    if '~' in lab_name:
        continue
    with open(os.path.join(cur_dir, file_list[i]), 'r') as f:
        lines = f.readlines()
        wave_name = os.path.join(wav_dir, lab_name[:-4]+'.wav')
        f = audlab.Sndfile(wave_name, 'r')
        freq = f.samplerate
        nframes = f.nframes
        frames_recovered = 0
        for j in xrange(len(lines)):
            try:
                cur_line = lines[j].split()
                start = int(cur_line[0])
                stop = int(cur_line[1])
                label = cur_line[2]
                length = stop / 10.0 ** 7 - start / 10.0 ** 7
                audio = f.read_frames(np.floor(freq * length))
                if label in label_dic:
                    signal = audio  # audio/math.sqrt(energy)
                    time = np.sum(time_per_occurrence_class[label_dic[label]])
                    if time < threshold:

コード例 #26

0

ファイルを表示

import scikits.audiolab as audio
import matplotlib.pyplot as plt
import random

def frequencyFilter(signal):
   print "Len signal:", len(signal)
   starting_freq = 0
   ending_freq = 190000 #len(signal)
   for i in range(0, len(signal)):
      signal[i] *= 2
      if starting_freq < i < ending_freq:
          signal[i] = 0

def processWithNumpy(signal):
   transformedSignal = numpy.fft.fft(signal)
   frequencyFilter(transformedSignal)

   cleanedSignal = numpy.fft.ifft(transformedSignal)
   return numpy.array(cleanedSignal, dtype=numpy.float64)

# Must be wav files.
infile = sys.argv[1]
outfile = sys.argv[2]

(inputSignal, samplingRate, bits) = audio.wavread(infile)
outputSignal = processWithNumpy(inputSignal)

outputFile = audio.Sndfile(outfile, 'w', audio.Format('wav'), 1, samplingRate)
outputFile.write_frames(outputSignal)
outputFile.close()

コード例 #27

0

ファイルを表示

ファイル: audio.py プロジェクト: ivanov/imagen

    def extract_specific_interval(self, interval_start, interval_end):
        """
        Overload if special behaviour is required when a series ends.
        """

        interval_start = int(interval_start)
        interval_end = int(interval_end)

        if interval_start >= interval_end:
            raise ValueError(
                "Requested interval's start point is past the requested end point."
            )

        elif interval_start > self.time_series.size:
            if self.repeat:
                interval_end = interval_end - interval_start
                interval_start = 0
            else:
                raise ValueError(
                    "Requested interval's start point is past the end of the time series."
                )

        if interval_end < self.time_series.size:
            interval = self.time_series[interval_start:interval_end]

        else:
            requested_interval_size = interval_end - interval_start
            remaining_signal = self.time_series[interval_start:self.
                                                time_series.size]

            if self.next_file == len(self.sound_files) and self.repeat:
                self.next_file = 0

            if self.next_file < len(self.sound_files):
                next_source = audiolab.Sndfile(
                    self.sound_files[self.next_file], 'r')
                self.next_file += 1

                if next_source.samplerate != self.sample_rate:
                    raise ValueError(
                        "All sound files must be of the same sample rate")

                if self.gap_between_sounds > 0:
                    remaining_signal = hstack(
                        (remaining_signal,
                         zeros(int(self.gap_between_sounds * self.sample_rate),
                               dtype=self.precision)))

                self.time_series = hstack(
                    (remaining_signal,
                     next_source.read_frames(next_source.nframes,
                                             dtype=self.precision)))

                interval = self.time_series[0:requested_interval_size]
                self._next_interval_start = requested_interval_size

            else:
                self.warning("Returning last interval of the time series.")
                self._next_interval_start = self.time_series.size + 1

                samples_per_interval = self.interval_length * self.sample_rate
                interval = hstack(
                    (remaining_signal,
                     zeros(samples_per_interval - remaining_signal.size)))

        return interval

コード例 #28

0

ファイルを表示

#Frame Rate used by api speech from google
fr=16000.

#using audiolab to read wav file
Signal, fs = audiolab.wavread(File)[:2]

#changing the original sample rate to 16000fs fast mode
Signal = resample(Signal, fr/float(fs), 'sinc_best')

#changing sample rate from audio file using scipy this is a bit slow
#Signal=scipy.signal.resample(Signal,int(round(len(Getsignal)*fr)/float(fs)),window=None)

# file Format type
fmt         = audiolab.Format('flac', 'pcm16')
nchannels   = 1

# making the file .flac
afile =  audiolab.Sndfile(FileNameTmp, 'w', fmt, nchannels, fr)

#writing in the file
afile.write_frames(Signal)

#Sending to google the file .flac
url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=pt-BR"
flac=open(FileNameTmp,"rb").read()
header = {'Content-Type' : 'audio/x-flac; rate=16000'}
req = urllib2.Request(url, flac, header)
data = urllib2.urlopen(req)
print data.read()
remove(FileNameTmp)

コード例 #29

0

ファイルを表示

import sys
import numpy as np
import scikits.audiolab as audiolab
from keras.models import Sequential
from keras.layers.core import Activation, Dense
from keras.layers.recurrent import LSTM
from keras.layers.embeddings import Embedding
from keras.utils import np_utils, generic_utils

f = audiolab.Sndfile('05 Woodstock.aif', 'r')

seq_len = 50
buckets = 2048
hidden_layer_size = 256
batch_size = 128
np_epoch = 10
output_n_frames = 44100*1

data = ((f.read_frames(f.nframes)[:,0] + 1.0)*(buckets/2))
data = data.astype(int)
sampling_rate = f.samplerate
nframes = f.nframes

f.close()

model = Sequential()
model.add(Embedding(buckets, hidden_layer_size, input_length=seq_len))
model.add(LSTM(output_dim=hidden_layer_size, activation="sigmoid", inner_activation="hard_sigmoid", init="uniform"))
model.add(Dense(input_dim=hidden_layer_size, output_dim=buckets, init="glorot_uniform"))
model.add(Activation("softmax"))

コード例 #30

0

ファイルを表示

"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np
import scikits.audiolab as audiolab

f = audiolab.Sndfile('05 Woodstock.aif', 'r')

data = f.read_frames(f.nframes)[:, 0]  # Gets first channel of the audio file
sampling_rate = f.samplerate

f.close()

# data I/O
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print 'data has %d characters, %d unique.' % (data_size, vocab_size)
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

# hyperparameters
hidden_size = 100  # size of hidden layer of neurons
seq_length = 25  # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size) * 0.01  # input to hidden
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size) * 0.01  # hidden to output
bh = np.zeros((hidden_size, 1))  # hidden bias