コード例 #1
1
ファイル: segment_corpus.py プロジェクト: DReiser7/w2v_did
def resample_and_segment_audio(dir_path, seconds):
    pathlist = Path(dir_path).glob('**/*.mp3')

    for path in pathlist:
        absolute_path = str(path)
        print('preprocessing ' + absolute_path + ' ...')

        # do resampling on root path
        speech_array, sampling_rate = torchaudio.load(str(path))
        speech_resampled = librosa.resample(
            np.asarray(speech_array[0].numpy()), sampling_rate, 16_000)

        segmented_path = str(path.parent).replace(
            'test', 'test-segmented-' + str(seconds))

        counter = 0
        for block in sf.blocks(speech_resampled,
                               blocksize=seconds * 16000,
                               overlap=16000,
                               fill_value=0):
            new_file_path = segmented_path + '/' + str(
                path.name) + '_' + str(counter) + '.mp3'

            filename = Path(segmented_path)
            filename.mkdir(parents=True, exist_ok=True)

            torchaudio.save(new_file_path, block, 16_000, format='mp3')
            counter += 1
コード例 #2
0
    def deNoise(self, outputFile):
        """
        De-noising function that reads the audio signal in chunks and processes
        and writes to the output file efficiently.

        VISU Shrink is used to generate the noise threshold

        Parameters
        ----------
        outputFile : str
            de-noised file name

        """
        info = soundfile.info(self.__inputFile)  # getting info of the audio
        rate = info.samplerate

        with soundfile.SoundFile(outputFile, "w", samplerate=rate, channels=info.channels) as of:
            for block in tqdm(soundfile.blocks(self.__inputFile, int(rate * info.duration * 0.10))):
                coefficients = pywt.wavedec(block, 'db4', mode='per', level=2)

                #  getting variance of the input signal
                sigma = mad(coefficients[- 1])

                # VISU Shrink thresholding by applying the universal threshold proposed by Donoho and Johnstone
                thresh = sigma * np.sqrt(2 * np.log(len(block)))

                # thresholding using the noise threshold generated
                coefficients[1:] = (pywt.threshold(i, value=thresh, mode='soft') for i in coefficients[1:])

                # getting the clean signal as in original form and writing to the file
                clean = pywt.waverec(coefficients, 'db4', mode='per')
                of.write(clean)
コード例 #3
0
def waveman(fn, config=None):
  if config == None:
    with open("config.json", "r") as f:
      CONFIG = json.load(f)
  else:
    CONFIG = config
  CONFIG['width'] = CONFIG['steps'] * CONFIG['step_width']
  log("Initializing new drawing context", file=fn)  
  canvas = svgwrite.Drawing(profile='tiny', viewBox=f"0 0 {CONFIG['width']} {CONFIG['height']}", preserveAspectRatio=CONFIG["preserveAspectRatio"])
  sf = soundfile.SoundFile(fn)
  total_samples = len(sf)
  block_length = int(total_samples // CONFIG['steps'])
  f = open(fn, "rb")
  block_iterator = soundfile.blocks(f, blocksize=block_length)
  chunks = []
  for i, block in enumerate(block_iterator):
    mono_block = list(map(lambda sample: (sample[0] + sample[1]) / 2, block))
    chunks.append(transformer(mono_block, CONFIG['mode']))
    log("Reduced frame to sample", i=i)
  chunks = normalize(chunks)
  log("Tranformed frames into chunks")
  for i, chunk in enumerate(chunks):
    canvas.add(artist(canvas, chunk, i, CONFIG['step_width'], CONFIG['height'], CONFIG['gap'], CONFIG['align'], CONFIG['rounded'], "#abcdef"))
  log("Created SVG rectangles for all data chunks")
  return canvas
コード例 #4
0
    def load_file(self, filepath, blocksize=1024, overlap=512):
        items = []
        if self._debug:
            print("File Processing", end="", flush=True)
        sr = sf.info(filepath).samplerate
        if(sr != self._sr):
            blocksize = int(sr / (self._sr/blocksize))
            if overlap > 0:
                overlap = int(sr / (self._sr/overlap))
        blockgen = sf.blocks(filepath,
                             blocksize=blocksize,
                             overlap=overlap,
                             always_2d=True,
                             fill_value=0.0)
        for bl in blockgen:
            if not np.any(bl):
                continue
            if self._debug:
                print(".", end="", flush=True)
            y = bl.transpose()
            y = librosa.resample(y, sr, self._sr)
            # Lowpass
            y = lowpass(y)
            y = y[:int(blocksize)]
            y = y[np.newaxis, :]
            items.append(y)

        if self._debug:
            print("Done")

        return np.vstack(items)
コード例 #5
0
    def convert(self):
        """
        Performs the fft for each time step and transforms the result
            into midi compatible data. This data is then passed to a
            midi file writer.
        """

        logging.info(str(self.info))
        logging.info("window: {} ms".format(self.time_window))
        logging.info("frequencies: min = {} Hz, max = {} Hz".format(
            self.min_freq, self.max_freq))

        with midi_writer.MidiWriter(
                outfile=self.outfile,
                channels=self.info.channels,
                time_window=self.time_window,
                bpm=self.bpm,
                condense=self.condense,
                condense_max=self.condense_max,
                max_note_length=self.max_note_length,
        ) as writer:
            for block in soundfile.blocks(
                    self.infile,
                    blocksize=self.block_size,
                    always_2d=True,
            ):
                if len(block) != self.block_size:
                    filler = numpy.array([
                        numpy.array([0.0 for _ in range(self.info.channels)])
                        for _ in range(self.block_size - len(block))
                    ])
                    block = numpy.append(block, filler, axis=0)
                notes = self._block_to_notes(block)
                writer.add_notes(notes)
                self._increment_progress()
コード例 #6
0
def load_file_blockwise(filename, blocksize=1024, overlap=512, debug=False):
    items = []
    if debug:
        print("File Processing", end="", flush=True)
    blockgen = sf.blocks(audio_dir + filename, 
                         blocksize=blocksize, 
                         overlap=overlap, 
                         always_2d=True,
                         fill_value=0.0)
    sr = sf.info(audio_dir + filename).samplerate
    for bl in blockgen:
        if not np.any(bl):
            continue
        if debug:
            print(".", end="", flush=True)
        y = bl.transpose()
        y = librosa.resample(y, sr, SR)
        y = y[:int(blocksize)]
        y = y[np.newaxis, :]
        items.append(y)
        
    if debug:
        print("Done")

    return items
コード例 #7
0
def load_audio_blockwise(data, blocksize=1024, overlap=512, debug=False):
    start_time = time.time()
    items = []
    target = []
    h_target = []
    for i, sample in data.iterrows():
        if debug:
            print("File Processing", end="", flush=True)
        blockgen = sf.blocks(audio_dir + sample['filename'], 
                             blocksize=blocksize, 
                             overlap=overlap, 
                             always_2d=True,
                             fill_value=0.0)
        sr = sf.info(audio_dir + sample['filename']).samplerate
        for bl in blockgen:
            if not np.any(bl):
                continue
            if debug:
                print(".", end="", flush=True)
            y = bl.transpose()
            y = librosa.resample(y, sr, SR)
            y = y[:int(blocksize)]
            y = y[np.newaxis, :]
            items.append(y)
            h_target.append(sample.h_target)
            target.append(sample.target)
        if debug:
            print("Done")
    if debug:
        print("\tProcessing Time: " + str(time.time() - start_time))
    return np.vstack(items), np.array(h_target), np.array(target)
コード例 #8
0
ファイル: main_neat.py プロジェクト: rgd-ul-2020/public
def main():
    population = Population(150)

    try:
        blocks = sf.blocks('inputs/hello_world.wav', blocksize=512, overlap=32)
        generations = tqdm(range(1))
        for generation in generations:
            block = next(blocks)
            block = np.pad(block, (0, 512 - len(block)), 'constant')

            fitness = population.evaluate(block)
            population.reproduce(generation)
            population.speciate(generation)

            generations.set_postfix({
                'G': '{:.4e}'.format(fitness[0]),
                'D': '{:.4e}'.format(fitness[1])
            })
    except AssertionError:
        pass
    print(population)
    print(population.enc_best_gen, population.enc_best_fit)
    print(population.dec_best_gen, population.dec_best_fit)
    print(population.crit_best_gen, population.crit_best_fit)
    print()
コード例 #9
0
def transcribe_file(speech_filepath, output_filepath, keyword_file,
                    start_timestamp, length):
    """Transcribe the given audio file. Length is in seconds."""
    from google.cloud import speech
    speech_client = speech.Client()
    """Read keyword_file, if any."""
    keywords = []  #empty list
    if os.path.exists(keyword_file):
        with open(keyword_file, 'r') as f:
            keywords_raw = f.readlines()
            for i in range(len(keywords_raw)):
                keywords.append(keywords_raw[i].lower().strip())

    #MULTILINGUAL SUPPORT DISABLED FOR NOW
    language = 'en-US'
    '''
	#set language
	language = ''
	if lang == '-e':
		language = 'en-US'
	else:
		language = 'ja-JP'
	'''
    """create and open text file to save transcription"""
    save_file = open(output_filepath, "w+")
    text = ''
    """Slice audio into # of blocks, then send to google cloud for analysis"""
    sample_rate = 44100
    stop = -1
    if length > 0:
        stop = start_timestamp + sample_rate * length
    print("start and stop is ", start_timestamp, stop)

    count = 0
    for audio in sf.blocks(speech_filepath, start=start_timestamp, stop=stop, \
     blocksize=PAYLOAD_LIMIT, overlap=OVERLAP):
        sf.SoundFile('buffer.wav', 'w', sample_rate, 1,
                     'PCM_16').write(audio.sum(axis=1) / float(2))
        content = io.open('buffer.wav', 'rb').read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate=sample_rate)
        print("evaluating block ", count)
        count += 1

        try:
            alternatives = audio_sample.sync_recognize(language_code=language,
                                                       speech_context=keywords)
            for alternative in alternatives:
                text += alternative.transcript + ' '
        except ValueError:
            continue
    """final save"""
    #add stop timestep on top
    text = str(stop) + "\n" + text
    save_file.write(text.encode('utf-8'))
    save_file.close()
    return text
コード例 #10
0
    def get_blocks(self, block_size):
        # self.blocks_count = (sound_file.frames - 1) // (block_size // 2) + 1
        blocks = sf.blocks(self._filename, block_size)

        blocks = list(map(one_channel, blocks))
        blocks_count = len(blocks)

        return IterableWithLength(blocks, blocks_count)
コード例 #11
0
ファイル: soundblocks.py プロジェクト: tria-git/beatdet
def getBlocksSF(audiosrc):
    sbl = sf.blocks(audiosrc, blocksize=22050)
    rate = sf.info(audiosrc).samplerate
    tgram = []
    for bl in sbl:
        y=np.mean(bl, axis=1)
        tgram.append(tempft(y,sr=rate))
    return(sbl,tgram,rate)
コード例 #12
0
ファイル: test_pysoundfile.py プロジェクト: tns-yuq/SoundFile
def test_blocks_inplace_modification(file_stereo_r):
    out = np.empty((3, 2))
    blocks = []
    for block in sf.blocks(file_stereo_r, out=out, overlap=1):
        blocks.append(np.copy(block))
        block *= 2

    expected_blocks = [data_stereo[0:3], data_stereo[2:5]]
    assert_equal_list_of_arrays(blocks, expected_blocks)
コード例 #13
0
ファイル: capture.py プロジェクト: cjbayron/rg_speech_to_text
 def __init__(self, audio_path, blocksize, sr=16000, overlap=0, padding=None, dtype="float32"):
     assert blocksize >= 0, "blocksize cannot be 0 or negative"
     self._sr = sr
     self._orig_sr = sf.info(audio_path).samplerate
     self._sf_blocks = sf.blocks(audio_path,
                     blocksize=blocksize, 
                     overlap=overlap,
                     fill_value=padding,
                     dtype=dtype)
コード例 #14
0
def test_blocks_inplace_modification(file_stereo_r):
    out = np.empty((3, 2))
    blocks = []
    for block in sf.blocks(file_stereo_r, out=out, overlap=1):
        blocks.append(np.copy(block))
        block *= 2

    expected_blocks = [data_stereo[0:3], data_stereo[2:5]]
    assert_equal_list_of_arrays(blocks, expected_blocks)
コード例 #15
0
 def blocks(self):
     frames = [
         frame for frame in sf.blocks(self.speechPath, self.blocksize)
     ]
     if np.shape(frames[-1:])[1] < 4 * self.samplerate:
         temp = np.concatenate(frames[-2:])
         frames = frames[:len(frames) - 2]
         frames.append(temp)
     return frames
コード例 #16
0
 def generator(self):
     sr = sf.info(self._audio_path).samplerate
     sf_blocks = sf.blocks(self._audio_path,
                           blocksize=self._blocksize,
                           overlap=self._overlap,
                           fill_value=self._padding,
                           dtype=self._dtype)
     for block in sf_blocks:
         yield self.__resample_file(block, sr, self._sr)
コード例 #17
0
ファイル: main_tcn.py プロジェクト: rgd-ul-2020/public
def next_file(flist, blocksize=1024, dur=3):
    fname = random.choice(flist)
    info = sf.info(fname)
    segsize = dur * info.samplerate
    start = random.randint(0, max(0, info.frames - segsize))
    return sf.blocks(fname,
                     blocksize=blocksize,
                     start=start,
                     stop=start + segsize,
                     overlap=0)
コード例 #18
0
def signal_enhance(speechFile, destFile, vadFile):
    dest_dir = '/'.join(destFile.split('/')[:-1])
    os.makedirs(dest_dir, exist_ok=True)
    vad_dir = '/'.join(vadFile.split('/')[:-1])
    os.makedirs(vad_dir, exist_ok=True)

    samplerate = sf.info(speechFile).samplerate

    x_wpe = []
    for block in sf.blocks(speechFile, 5 * samplerate):
        x_wpe.append(wpe.wpe_dereverb(block, samplerate))
    return x_wpe
コード例 #19
0
def DelayedFileAudioSource(path, blocksize=512):
    """
    Simulates real-time decoding with a time.sleep corresponding to the
    duration of the audio packet sent at each yield (assumes 8kHz)

    :path: Path to the audio input (any format supported by soundfile package)
    :blocksize: Size of the blocks of audio which will be sent (in samples)
    """
    for block in sf.blocks(path, blocksize):
        # Soundfile converts to 64-bit float ndarray. We convert back to bytes
        bytes = (block * 2**15).astype('<i2').tobytes()
        time.sleep(blocksize / 8000. / 2.)  # 8kHz
        yield bytes
コード例 #20
0
ファイル: bigcnn.py プロジェクト: isenilov/WetRoad
def generator(w, d, batch_size=32, N=N):
    i = 0
    while 1:
        data = []
        labels = []
        wet = sf.blocks(w, blocksize=N, overlap=N//2, start=i)
        dry = sf.blocks(d, blocksize=N, overlap=N//2, start=i)
        for n in range(batch_size):
            data.append(next(wet))
            labels.append(1)
            data.append(next(dry))
            labels.append(0)
            i += N
        data = np.array(data)
        # data = data[:, :, 0] #  for stereo data
        data = np.expand_dims(data, axis=1)
        data = data.reshape((data.shape[0], 1, data.shape[2]))
        data = np.expand_dims(data, axis=3)
        # print("\n{:,}".format(i))
        yield data, np.array(to_categorical(labels))
        if i + batch_size * N > 1000000000:
            i = 0
コード例 #21
0
def load_audio_blocks(filename, frame_length, block_length, hop_length):

    # Separate audio file into overlapping blocks
    blocks = sf.blocks(filename,
                       blocksize=frame_length +
                       (block_length - 1) * hop_length,
                       overlap=frame_length - hop_length,
                       fill_value=None,
                       start=0,
                       frames=-1,
                       dtype=np.float32,
                       always_2d=False)
    return blocks
コード例 #22
0
    def transform(self, path):
        dict = {}
        count = 0
        for block in sf.blocks(path,
                               blocksize=160000,
                               overlap=16000,
                               fill_value=0):
            dict[count] = block
            count += 1

        data = list(dict.items())
        array = np.array(data)

        return torch.from_numpy(array)
コード例 #23
0
    def play(self):
        for block in soundfile.blocks(self.file, blocksize=self.framesize):

            if not self.start_time:
                self.start_time = rospy.Time.now()
            end_time = self.start_time + self.frametime
            timestamps = RecordingTimeStamps()
            timestamps.start = self.start_time
            timestamps.finish = end_time

            rospy.sleep(end_time - rospy.Time.now())
            self.esiaf_handler.publish(self.topic, block,
                                       msg_to_string(timestamps))
            self.start_time = end_time
コード例 #24
0
def getAttacks(tempo,partitions,filename):
	data,samplerate = sf.read(filename)
	
	windowsize = 60*samplerate/(tempo*partitions)

	rms = [np.sqrt(np.mean(block**2)) for block in sf.blocks(filename, blocksize=(int)(windowsize), overlap=(int)(windowsize/2))]
	x = np.linspace(0, len(rms)-1, len(rms))
	
#	print(rms)

	rms = mf.filter(rms)
#	pl.show(pl.plot(x,rms))
	
	att,rls = attacks(rms)
	return att,rls
コード例 #25
0
def decode(modFile):
    # save message for return val
    message = []

    # load wav data and stats
    data, sampleRate = sf.read(modFile, dtype=WAV_DATATYPE)
    frameSize = sampleRate // BAUD_RATE
    print(f"\nStats: sample rate {sampleRate}, frame size {frameSize}")

    # create decoder object
    decoder = Demodulator(sampleRate, frameSize)

    # init loop vars
    start = False
    cntBits = 0
    bits = []

    # start of message
    print("\n\"", end=" ")

    # mimic real-time, read blocks from file like it's a buffer
    for frame in sf.blocks(modFile, blocksize = frameSize):

        # wait for start bit
        if start == False and decoder.decode(frame) == SPACE:
            start = True

        # read the byte 
        elif start == True and cntBits < 8:
            bits.append(decoder.decode(frame))
            cntBits += 1

        elif start == True and cntBits == 8:
            # verify next bit is end bit
            assert decoder.decode(frame) == MARK, "Stop bit not detected"
            letter = convertBits(bits)
            print(letter, end=" ")
            message.append(letter)
            bits = []
            cntBits = 0
            start = False
       
    # end of message 
    print(" \"\n")

    return message
コード例 #26
0
    def run_long_demo(self, wav, feature_converter, cut_time):
        """
                wav: wav file path 
                feature_converter: to processs feature, class filterbank
                cut_time: cut long wac into equal length wav file, block size
                """

        print(
            'Start Decoding, decoding in blocks, block size is approximately 6 seconds'
        )
        self.long_wav = ''
        #self.block_size = cut_time*16320
        self.block_size = cut_time * 15840
        wav = sf.blocks(wav,
                        blocksize=self.block_size,
                        overlap=0,
                        dtype='float32')
        result_index = []
        result_index_list = [[]]
        preprocess_time = 0
        time_info = {
            'lstmrun': 0,
            'convert': 0,
            'read': 0,
            'preprocess_time': 0
        }
        start = time.time()
        for part in wav:
            part = self.preprocess_part(part)
            part_length = torch.tensor([int(part.shape[1])])
            p_s = time.time()
            input = feature_converter([part, part_length])
            preprocess_time += time.time() - p_s
            index, time_info_dict = self._run_short(input)
            result_index_list = self.concate_2list(result_index_list, index)
            for k, v in time_info_dict.items():
                time_info[k] += v
        time_info['preprocess_time'] = preprocess_time
        end = time.time()
        self.decoding_time = end - start

        print()
        self.print_summary()
        print('dpu time:', time_info['lstmrun'])
        # Fix me , result_index is batch 1 ,need to support multi batch index
        return result_index_list
コード例 #27
0
def iter_corpus(only_label=None, as_binary=None, except_label=None):
    """
    Iterates over data, returning tuples of the form (label, [feature0, feature1, ...featureN])
    """
    for audio_fn, speaker_label in speakers_data.items():
        # print(audio_fn, speaker_label)

        if only_label is not None and speaker_label != only_label:
            continue

        if except_label is not None and speaker_label == except_label:
            continue

        #https://stackoverflow.com/a/44800492/247542
        #https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.io.wavfile.read.html
        # sample_rate, samples = wavfile.read(os.path.join(DATA_DIR, audio_fn))
        first_len = None
        for sample in sf.blocks(os.path.join(DATA_DIR, audio_fn),
                                blocksize=CHUNK_SIZE):

            # print('sample_rate:', sample_rate) # samples/second
            # print('sample.shape:', sample.shape)
            f, t, Sxx = signal.spectrogram(sample, RATE)

            # Limit frequencies to the human voice range.
            # fmin = 50 # Hz
            # fmax = 300 # Hz
            # freq_slice = np.where((f >= fmin) & (f <= fmax))
            # f = f[freq_slice]
            # Sxx = Sxx[freq_slice,:][0]
            f, Sxx = only_voice_range(f, Sxx)

            Sxx = Sxx.flatten()
            if first_len is None:
                first_len = len(Sxx)
            elif len(Sxx) != first_len:
                continue
            # print('Sxx.shape:', Sxx.shape, len(Sxx))
            # assert Sxx.shape == (258,)
            assert Sxx.shape == (8, )

            if as_binary is not None:
                speaker_label = speaker_label == as_binary

            yield Sxx, speaker_label  # X, y
コード例 #28
0
def iter_corpus(only_label=None, as_binary=None, except_label=None):
    """
    Iterates over data, returning tuples of the form (label, [feature0, feature1, ...featureN])
    """
    for audio_fn, speaker_label in speakers_data.items():
        # print(audio_fn, speaker_label)
        
        if only_label is not None and speaker_label != only_label:
            continue
        
        if except_label is not None and speaker_label == except_label:
            continue
        
        #https://stackoverflow.com/a/44800492/247542
        #https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.io.wavfile.read.html
        # sample_rate, samples = wavfile.read(os.path.join(DATA_DIR, audio_fn))
        first_len = None
        for sample in sf.blocks(os.path.join(DATA_DIR, audio_fn), blocksize=CHUNK_SIZE):
            
            # print('sample_rate:', sample_rate) # samples/second
            # print('sample.shape:', sample.shape)
            f, t, Sxx = signal.spectrogram(sample, RATE)

            # Limit frequencies to the human voice range.
            # fmin = 50 # Hz
            # fmax = 300 # Hz
            # freq_slice = np.where((f >= fmin) & (f <= fmax))
            # f = f[freq_slice]
            # Sxx = Sxx[freq_slice,:][0]
            f, Sxx = only_voice_range(f, Sxx)

            Sxx = Sxx.flatten()
            if first_len is None:
                first_len = len(Sxx)
            elif len(Sxx) != first_len:
                continue
            # print('Sxx.shape:', Sxx.shape, len(Sxx))
            # assert Sxx.shape == (258,)
            assert Sxx.shape == (8,)
            
            if as_binary is not None:
                speaker_label = speaker_label == as_binary
            
            yield Sxx, speaker_label # X, y
コード例 #29
0
def process(s, alpha):
    print("Analyzing the file " + s)
    # block processing
    env = np.concatenate(
        [filt(block) for block in sf.blocks(wd + '/' + s, blocksize=fs * 10)])
    #pool = Pool(processes = 5)
    #result = pool.map(filt, blocks)
    #env = sum(result)
    peaks = peak_find(env)
    # add file name as a separate column
    result = np.hstack((peaks, np.array([[s]] * len(peaks))))
    result
    # convert into dataframe
    df_res = pd.DataFrame({
        'Time': result[:, 0],
        'Amp': result[:, 1],
        'File': result[:, 2]
    })
    return df_res
コード例 #30
0
def FileAudioSource(path, chunk_size=4096):
    """
    Simple audio file reader. Should be compatible with all files supported
    by 'soundfile' package.

    chunk_size is in samples, so the size in bytes of the sent packet is
    2*chunk_size, since we are sending 16-bit signed PCM samples. chunk_size*2
    should be smaller than the predefined maximum payload from the configured
    websocket connection.

    :path: Path to the audio input (any format supported by soundfile package)
    :chunk_size: Size of the blocks of audio which will be sent (in samples)

    :yields: bytestrings of size <chunk_size> * 2

    Terminates when the audio file provided has no more content
    """
    for block in sf.blocks(path, chunk_size):
        # Soundfile converts to 64-bit float ndarray. We convert back to bytes
        bytestr = (block * 2**15).astype('<i2').tobytes()
        yield bytestr
コード例 #31
0
ファイル: soundblocks.py プロジェクト: tria-git/beatdet
def getBeatsBl(audiosrc):
    y, sr = librosa.load(audiosrc)
    bl = range(0,len(y),10*sr)
    spl = list(zip(bl[:-1],bl[1:]))
    tempo = []
    beats=[]
    ts = []
    for start, end in spl:
        onset_env = librosa.onset.onset_strength(y=y[start:end], sr=sr, aggregate=np.median)
        temp, beat = librosa.beat.beat_track(y=y[start:end], sr=sr, onset_envelope=onset_env)
#       tst = librosa.frames_to_time(beat, sr=sr)
        beats.append(beat)
        tempo.append(temp)
        #import pdb;pdb.set_trace()
        tst = librosa.frames_to_time(beats, sr=sr)
        ts.append(tst)
#    import pdb;pdb.set_trace()
    return(ts)

    ablocks = sf.blocks(src, blocksize=1024)
    srate = sf.info(src).samplerate
    return(ablocks, srate)
コード例 #32
0
def test_blocks_fill_last_block(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=3, fill_value=0))
    last_block = np.row_stack((data_stereo[3:4], np.zeros((2, 2))))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:3], last_block])
コード例 #33
0
def test_blocks_partial_last_block(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=3))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:3], data_stereo[3:4]])
コード例 #34
0
def test_blocks_full_last_block(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:2], data_stereo[2:4]])
コード例 #35
0
def test_blocks_without_blocksize():
    with pytest.raises(TypeError):
        list(sf.blocks(filename_stereo))
コード例 #36
0
ファイル: audio.py プロジェクト: ai-learn-use/librosa
def stream(path, block_length, frame_length, hop_length,
           mono=True, offset=0.0, duration=None, fill_value=None,
           dtype=np.float32):
    '''Stream audio in fixed-length buffers.

    This is primarily useful for processing large files that won't
    fit entirely in memory at once.

    Instead of loading the entire audio signal into memory (as
    in `load()`, this function produces *blocks* of audio spanning
    a fixed number of frames at a specified frame length and hop
    length.

    While this function strives for similar behavior to `load`,
    there are a few caveats that users should be aware of:

        1. This function does not return audio buffers directly.
           It returns a generator, which you can iterate over
           to produce blocks of audio.  A *block*, in this context,
           refers to a buffer of audio which spans a given number of
           (potentially overlapping) frames.
        2. Automatic sample-rate conversion is not supported.
           Audio will be streamed in its native sample rate,
           so no default values are provided for `frame_length`
           and `hop_length`.  It is recommended that you first
           get the sampling rate for the file in question, using
           `get_samplerate()`, and set these parameters accordingly.
        3. Many analyses require access to the entire signal
           to behave correctly, such as `resample`, `cqt`, or
           `beat_track`, so these methods will not be appropriate
           for streamed data.
        4. The `block_length` parameter specifies how many frames
           of audio will be produced per block.  Larger values will
           consume more memory, but will be more efficient to process
           down-stream.  The best value will ultimately depend on your
           application and other system constraints.
        5. By default, most librosa analyses (e.g., short-time Fourier
           transform) assume centered frames, which requires padding the
           signal at the beginning and end.  This will not work correctly
           when the signal is carved into blocks, because it would introduce
           padding in the middle of the signal.  To disable this feature,
           use `center=False` in all frame-based analyses.

    See the examples below for proper usage of this function.


    Parameters
    ----------
    path : string, int, or file-like object
        path to the input file to stream.

        Any codec supported by `soundfile` is permitted here.

    block_length : int > 0
        The number of frames to include in each block.

        Note that at the end of the file, there may not be enough
        data to fill an entire block, resulting in a shorter block
        by default.  To pad the signal out so that blocks are always
        full length, set `fill_value` (see below).

    frame_length : int > 0
        The number of samples per frame.

    hop_length : int > 0
        The number of samples to advance between frames.

        Note that by when `hop_length < frame_length`, neighboring frames
        will overlap.  Similarly, the last frame of one *block* will overlap
        with the first frame of the next *block*.

    mono : bool
        Convert the signal to mono during streaming

    offset : float
        Start reading after this time (in seconds)

    duration : float
        Only load up to this much audio (in seconds)

    fill_value : float [optional]
        If padding the signal to produce constant-length blocks,
        this value will be used at the end of the signal.

        In most cases, `fill_value=0` (silence) is expected, but
        you may specify any value here.

    dtype : numeric type
        data type of audio buffers to be produced

    Yields
    ------
    y : np.ndarray
        An audio buffer of (at most) 
        `block_length * (hop_length-1) + frame_length` samples.

    See Also
    --------
    load
    get_samplerate
    soundfile.blocks

    Examples
    --------
    Apply a short-term Fourier transform to blocks of 256 frames
    at a time.  Note that streaming operation requires left-aligned
    frames, so we must set `center=False` to avoid padding artifacts.

    >>> filename = librosa.util.example_audio_file()
    >>> sr = librosa.get_samplerate(filename)
    >>> stream librosa.stream(filename,
    ...                       block_length=256,
    ...                       frame_length=4096,
    ...                       hop_length=1024)
    >>> for y_block in stream:
    ...     D_block = librosa.stft(y_block, center=False)

    Or compute a mel spectrogram over a stream, using a shorter frame
    and non-overlapping windows

    >>> filename = librosa.util.example_audio_file()
    >>> sr = librosa.get_samplerate(filename)
    >>> stream = librosa.stream(filename,
    ...                         block_length=256,
    ...                         frame_length=2048,
    ...                         hop_length=2048)
    >>> for y_block in stream:
    ...     m_block = librosa.feature.melspectrogram(y_block, sr=sr,
    ...                                              n_fft=2048,
    ...                                              hop_length=2048,
    ...                                              center=False)

    '''

    if not (np.issubdtype(type(block_length), np.integer) and block_length > 0):
        raise ParameterError('block_length={} must be a positive integer')
    if not (np.issubdtype(type(frame_length), np.integer) and frame_length > 0):
        raise ParameterError('frame_length={} must be a positive integer')
    if not (np.issubdtype(type(hop_length), np.integer) and hop_length > 0):
        raise ParameterError('hop_length={} must be a positive integer')

    # Get the sample rate from the file info
    sr = sf.info(path).samplerate

    # Construct the stream
    if offset:
        start = int(offset * sr)
    else:
        start = 0

    if duration:
        frames = int(duration * sr)
    else:
        frames = -1

    blocks = sf.blocks(path,
                       blocksize=frame_length + (block_length - 1) * hop_length,
                       overlap=frame_length - hop_length,
                       fill_value=fill_value,
                       start=start,
                       frames=frames,
                       dtype=dtype,
                       always_2d=False)

    for block in blocks:
        if mono:
            yield to_mono(block.T)
        else:
            yield block.T
コード例 #37
0
def test_blocks_with_overlap(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=3, overlap=2))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:3], data_stereo[1:4]])
コード例 #38
0
def test_blocks_with_start(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2, start=2))
    assert_equal_list_of_arrays(blocks, [data_stereo[2:4]])
コード例 #39
0
def test_blocks_mono():
    blocks = list(sf.blocks(filename_mono, blocksize=3, dtype='int16',
                            fill_value=0))
    assert_equal_list_of_arrays(blocks, [[0, 1, 2], [-2, -1, 0]])
コード例 #40
0
    def detect_silent(self,
                      block_size=1.0,
                      slide_size=0.1,
                      threshold=0.2,
                      col=0):
        '''
        引数は全て秒単位
        block_size : ブロックサイズを1秒(1秒以上無音が続いたら削除)
        slide_size : 0.1秒のギャップを許す(ブロックのスライド間隔)
        threshold : 絶対値が0.2以下を無音とみなす
        '''

        blocksize_ms = int(block_size * self.samplerate)
        slide_ms = int(slide_size * self.samplerate)
        overlap_ms = blocksize_ms - slide_ms

        print(blocksize_ms)
        print(slide_ms)
        print(overlap_ms)

        # silent_listを作る
        silent_judge = []  # 0なら無音、1なら有音
        i = 0
        for block in sf.blocks(self.fname,
                               blocksize=blocksize_ms,
                               overlap=overlap_ms):

            if len(block.shape) == 1:
                block = abs(block)
                if len(block) != blocksize_ms:
                    length_last_block = len(block)
                    print("last")
                    print(length_last_block)
                else:
                    length_last_block = 0
            else:
                block = abs(block[:, col])
                if len(block) != blocksize_ms:
                    length_last_block = len(block)
                    print("last")
                    print(length_last_block)
                else:
                    length_last_block = 0

            binal_arr = np.where(block > threshold, 1,
                                 0)  # 絶対値が0.2以上は1, 未満は0の行列

            if np.sum(np.ones(len(binal_arr)) * binal_arr) == 0:
                silent_judge.append(0)
            else:
                silent_judge.append(1)

            i += 1

        # silent_listから実際の長さと合うような無音判定行列を作る
        final_silent_judge = []

        for i in range(len(silent_judge)):

            val = silent_judge[i]

            if val == 0:
                if i == len(silent_judge) - 1:
                    final_silent_judge.append([0] * length_last_block)
                else:
                    final_silent_judge.append([0] * slide_ms)
            elif val == 1:
                if i == len(silent_judge) - 1:
                    final_silent_judge.append([1] * length_last_block)
                else:
                    final_silent_judge.append([1] * slide_ms)

#         silent_judge = []
#         for smll in new_listing:
#             for val in smll:
#                 silent_judge.append(val)

#         silent_judge_final = np.ones(self.length_ms)
#         silent_judge_final[:len(silent_judge)] = silent_judge

        final_silent_judge_flatten = []
        for blk in final_silent_judge:
            for num in blk:
                final_silent_judge_flatten.append(num)

        self.silent_judge_final = np.array(final_silent_judge_flatten)
        self.silent_mask = np.array(final_silent_judge_flatten) == 1
コード例 #41
0
def test_blocks_with_frames_and_fill_value(file_stereo_r):
    blocks = list(
        sf.blocks(file_stereo_r, blocksize=2, frames=3, fill_value=0))
    last_block = np.row_stack((data_stereo[2:3], np.zeros((1, 2))))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:2], last_block])
コード例 #42
0
def test_blocks_with_stop_smaller_than_start(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2, start=2, stop=1))
    assert blocks == []
コード例 #43
0
def test_blocks_with_frames(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2, frames=3))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:2], data_stereo[2:3]])
コード例 #44
0
def test_blocks_with_negative_start_and_stop(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2, start=-2, stop=-1))
    assert_equal_list_of_arrays(blocks, [data_stereo[-2:-1]])
コード例 #45
0
def test_blocks_with_too_large_stop(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=3, stop=666))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:3], data_stereo[3:4]])
コード例 #46
0
def test_blocks_with_too_large_start(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2, start=666))
    assert_equal_list_of_arrays(blocks, [[]])
コード例 #47
0
def test_blocks_with_stop(file_stereo_r):
    blocks = list(sf.blocks(file_stereo_r, blocksize=2, stop=2))
    assert_equal_list_of_arrays(blocks, [data_stereo[0:2]])

    with pytest.raises(TypeError):
        list(sf.blocks(filename_stereo, blocksize=2, frames=2, stop=2))
コード例 #48
-1
def test_blocks_with_out(file_stereo_r):
    out = np.empty((3, 2))
    blocks = list(sf.blocks(file_stereo_r, out=out))
    assert blocks[0] is out
    # First frame was overwritten by second block:
    assert np.all(blocks[0] == data_stereo[[3, 1, 2]])

    assert blocks[1].base is out
    assert np.all(blocks[1] == data_stereo[[3]])

    with pytest.raises(TypeError):
        list(sf.blocks(filename_stereo, blocksize=3, out=out))
コード例 #49
-1
def test_blocks_with_out(file_stereo_r):
    out = np.empty((3, 2))
    blocks = list(sf.blocks(file_stereo_r, out=out))
    assert blocks[0] is out
    # First frame was overwritten by second block:
    assert np.all(blocks[0] == data_stereo[[3, 1, 2]])

    assert blocks[1].base is out
    assert np.all(blocks[1] == data_stereo[[3]])

    with pytest.raises(TypeError):
        list(sf.blocks(filename_stereo, blocksize=3, out=out))