Example #1
1
 def play_speech(self, speech_file):
     """
     It open the audio and make the "queue" of chunks from it
     """
     self._speaking_chunks = make_chunks(
         AudioSegment.from_wav(speech_file), 1000)
     self._is_speaking = True
Example #2
0
def AUDIO(s, audios, intervals, times, gain=None, epsilon=.4):
	for idx, audio in enumerate(audios):
		if idx == 0:
			a = AudioSegment.from_mp3(audio)[1000 * intervals[idx][0]:1000 * (intervals[idx][1] + epsilon)]
			s.renderer.file_writer.add_audio_segment(a, times[idx])
		else:
			a = AudioSegment.from_mp3(audio)[1000 * intervals[idx][0]:1000 * (intervals[idx][1] + epsilon)]
			s.renderer.file_writer.add_audio_segment(a, times[idx], gain)
Example #3
0
def wav_split(file):
    main_wav_path = file
    path = os.path.dirname(file) + '/'
    sound_len = int(float(mediainfo(main_wav_path)['duration']))
    sound = AudioSegment.from_wav(main_wav_path)
    part_file_list = list()
    n = 1
    if sound_len > 60:
        n = sound_len // 60
        while n * 60 < sound_len:
            n = n + 1
    with shelve.open('DB/lines.db') as db:
        for i in range(n):
            start_time = i * 60 * 1000 + 1
            end_time = (i + 1) * 60 * 1000
            if end_time > sound_len * 1000:
                end_time = sound_len * 1000
            word = sound[start_time:end_time]
            part_file_name = '{}part_sound_{}.wav'.format(path, i)
            word.export(part_file_name, format='wav')
            part_file_list.append(part_file_name)
            record = {"start": start_time, "end": end_time, "lines": []}

            # 对不在数据库中的条目, 存储之
            if not db[part_file_name]:
                db[part_file_name] = record
    return part_file_list
Example #4
0
    def prepare(media: bytes, bit_rate: int) -> Optional[ToDecode]:
        corpus_id = codecs.encode(os.urandom(CORPUS_HASH_LEN), 'hex').decode()
        wav_in = os.path.join(Audio.get_prefix(), corpus_id + ".pre.wav")
        wav_out = os.path.join(Audio.get_prefix(), corpus_id + ".wav")
        while os.path.exists(wav_out) or os.path.exists(wav_in):
            corpus_id = codecs.encode(os.urandom(CORPUS_HASH_LEN),
                                      'hex').decode()
            wav_in = os.path.join(Audio.get_prefix(), corpus_id + ".pre.wav")
            wav_out = os.path.join(Audio.get_prefix(), corpus_id + ".wav")

        in_wav = open(wav_in, "wb")
        in_wav.write(media)
        in_wav.close()

        Audio.epoch += 1
        ffmpeg_command = "ffmpeg -i " + wav_in + " -vn -acodec pcm_s16le -ar " + str(
            bit_rate) + " -ac 2 " + wav_out
        p = subprocess.Popen(ffmpeg_command,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        output, err = p.communicate()
        if p.returncode != 0:
            logging.debug(output.decode("utf-8"))
            logging.error(err.decode("utf-8"))
            return None

        sound = AudioSegment.from_wav(wav_out)
        sound = sound.set_channels(1)
        sound.export(wav_out, format="wav")
        return ToDecode(wav_out, Audio._media_duration(wav_out), corpus_id)
def download(url):
    options = {
        'format':
        'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl':
        'largeout.webm',  # name the file the ID of the video
        'logger':
        MyLogger(),
        'progress_hooks': [my_hook],
    }
    with youtube_dl.YoutubeDL(options) as ydl:
        info = ydl.extract_info(url,
                                download=False)  # don't download, much faster
        description = info['description']
        totalDuration = info['duration']
        yt_name = info['title']
        ydl.download([url])
    entries = Parsers.catalystTextParse("catalystdesc.txt", totalDuration)
    #entries = Parsers.dubstepParse(description, totalDuration)
    print("Loading file...")
    # load sound
    sound = AudioSegment.from_mp3("largeout.mp3")
    split_export(entries, sound, yt_name)
Example #6
0
 def play_music(self, music_file):
     """
     It open the audio and make the "queue" of chunks from it
     """
     self._music_chunks = make_chunks(
         AudioSegment.from_wav(music_file), 1000)
     self._is_music = True
    def formant(self, val, f0_v):
        '''
            Change formant.
            val : formant rate
            f0_v: f0 rate
        '''
        f_rate = self.audio.frame_rate
        np_arr = np.array(self.audio.get_array_of_samples(),
                          dtype=np.float64)  # pydub --> np.array(float64) 変換
        # print(np_arr, f_rate)
        _f0_val, _time = pyworld.dio(np_arr, f_rate)  # 基本周波数
        spct = pyworld.cheaptrick(np_arr, _f0_val, _time, f_rate)  # スペクトル包絡
        aper = pyworld.d4c(np_arr, _f0_val, _time, f_rate)  # 非周期性指標
        spct_b = np.zeros_like(spct)
        for i in range(spct_b.shape[1]):
            spct_b[:, i] = spct[:, int(i / val)]
        ef_audio = pyworld.synthesize(_f0_val * f0_v, spct_b, aper, f_rate)
        ef_audio = ef_audio.astype(np.int16).tobytes()

        # print(ef_audio)
        # print(type(ef_audio))
        new_audio = AudioSegment(
            ef_audio,
            sample_width=self.audio.sample_width,
            frame_rate=f_rate,
            channels=self.audio.channels,
        )
        self.audio = new_audio
        return self
Example #8
0
    def cache_from_local(self):
        """
        从本地载入音频数据
        发送消息必须与 cache_from_url 一致,以便外部程序获取消息进行后续操作
        其中专辑封面图片文件如果没有找到,则使用默认的封面图片 LOGO
        同时需要补充音频的时间长度数据
        """
        self.emit(SIGNAL('before_cache()'))
        if self.song_info is None:
            self.stop()
            return

        self.image_data = self.check_image_cache()
        if not self.image_data:
            self.image_data = QM_DEFAULT_ICON_DATA
        self.audio_segment = AudioSegment.from_file(self.song_info.song_path)

        # 下面是补充两项属性,音频的秒数和转换为时间格式的时长
        self.song_info.interval = int(
            round(self.audio_segment.duration_seconds))
        self.song_info.length = seconds2time(
            self.audio_segment.duration_seconds)

        self.emit(SIGNAL('caching()'))
        self.is_stop = True
        self.emit(SIGNAL('after_cache()'))
Example #9
0
 def play_music(self, music_file):
     """
     It open the audio and make the "queue" of chunks from it
     """
     self._music_chunks = make_chunks(AudioSegment.from_wav(music_file),
                                      1000)
     self._is_music = True
Example #10
0
 def play_speech(self, speech_file):
     """
     It open the audio and make the "queue" of chunks from it
     """
     self._speaking_chunks = make_chunks(AudioSegment.from_wav(speech_file),
                                         1000)
     self._is_speaking = True
Example #11
0
def 获取单首歌曲特征(file):  #fetch_index_label
    '''转换音乐文件格式并且提取其特征'''
    '''./data/music\\50 Cent - Ready For War.mp3'''
    items = file.split('.')
    file_format = items[-1].lower()  #获取歌曲格式
    file_name = file[:-(len(file_format) + 1)]  #获取歌曲名称
    if file_format != 'wav':
        '''把mp3格式转换为wav,保存至原文件夹中'''
        print('file_format:' + file_format)
        song = AudioSegment.from_file(file, format='mp3')
        file = file_name + '.wav'
        song.export(file, format='wav')
    try:
        '''提取wav格式歌曲特征'''
        rate, data = wavfile.read(file)
        mfcc_feas = mfcc(data, rate, numcep=13, nfft=2048)
        mm = np.transpose(mfcc_feas)
        mf = np.mean(mm, axis=1)  # mf变成104维的向量
        mc = np.cov(mm)
        result = mf
        for i in range(mm.shape[0]):
            result = np.append(result, np.diag(mc, i))


#         os.remove(file)
        return result  #返回1个104维的向量
    except Exception as msg:
        print(msg)
    def low_pass_filter(self, fp, fs, g_pass, g_stop):
        '''
            Butterworth filter (low pass)

            https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html
            https://watlab-blog.com/2019/04/30/scipy-lowpass/
        '''
        samples = np.array(self.audio.get_array_of_samples())
        channels = self.audio.channels
        frame_rate = self.audio.frame_rate
        # print(f'samples dim: {samples.ndim} / shape: {samples.shape}\nchannels: {channels}\nfame_rate: {frame_rate}')

        fn = frame_rate / 2
        wp = fp / fn
        ws = fs / fn
        N, Wn = signal.buttord(wp, ws, g_pass, g_stop)
        b, a = signal.butter(N, Wn, 'low')
        sample_res = np.zeros(samples.shape[0], dtype=np.float64)

        for i in range(channels):
            sample_m = samples[i::channels]
            sample_res[i::channels] = signal.filtfilt(
                b, a, sample_m)  # チャンネルごとにフィルタを適用
        # print(samples[1000000:1000010])
        # print(sample_res[1000000:1000010])

        # print(f'samples dim: {samples.ndim} / shape: {sample_res.shape}\nwidth: {self.audio.sample_width}')
        res = AudioSegment(
            sample_res.astype(np.int16).tobytes(),
            sample_width=self.audio.sample_width,
            frame_rate=frame_rate,
            channels=channels,
        )
        return res
Example #13
0
def unscramble(bp, stories, suffix):
    if type(bp) == str: bp = pd.read_csv(bp)
    intact = bp.sort(['old_name', 'order'])
    stories = {story : AudioSegment.from_file(story) for story in stories}
    for ii, g in intact.groupby(['old_name']):
        aud = aud_from_log(g, **stories)
        try: ftype = suffix.split('.')[-1]
        except IndexError: ftype = "wav"
        aud.export(ii + suffix, ftype)
Example #14
0
 def getVoice(self, text, filename):
     # 2. 向Rest接口提交数据
     data=parse.urlencode([
         ('tex',text),
         ('lan','zh'),
         ('cuid',self.cu_id),
         ('ctp',1),
         ('tok',self.token_str)
     ])
     f=urlopen(Request(self.getvoice_url),data = data.encode('utf-8'))
     voice_fp = open(self.locpla+filename,'wb')
     voice_fp.write(f.read())
     voice_fp.close()
     f.close()
     sound = AudioSegment.from_mp3(self.locpla + "turing.mp3")
     sound.export(self.locpla + 'turing.wav', format="wav")
Example #15
0
def get_second_part_wav(main_wav_path, start_time, end_time, part_wav_path):
    '''
    音频切片,获取部分音频 单位是秒级别
    :param main_wav_path: 原音频文件路径
    :param start_time:  截取的开始时间
    :param end_time:  截取的结束时间
    :param part_wav_path:  截取后的音频路径
    :return:
    '''
    start_time = int(start_time) * 1000
    end_time = int(end_time) * 1000

    sound = AudioSegment.from_mp3(main_wav_path)
    word = sound[start_time:end_time]

    word.export(part_wav_path, format="wav")
Example #16
0
def convert_audio(from_path, to_dir, from_format=None, to_format='wav'):
    """Convert audios to a specified audio format."""
    assert os.path.isdir(to_dir)

    from_files = []
    if os.path.isfile(from_path):
        from_files.append(from_path)
    elif os.path.isdir(from_path):
        file_paths = [
            os.path.join(from_path, name) for name in os.listdir(from_path)
        ]
        from_files = [file for file in file_paths if os.path.isfile(file)]

    for from_file in tqdm(from_files):
        to_name = os.path.basename(from_file). \
            replace(os.path.splitext(from_file)[1], '.' + to_format)
        to_path = os.path.join(to_dir, to_name)
        audio = AudioSegment.from_file(from_file, from_format)
        audio.export(to_path, to_format)
Example #17
0
    def check_audio_cache(self):
        """
        检查缓存文件是否存在,且是否缓存完毕
        :return: False 或 AudioSegment 对象
        """
        cache_song = QM_DEFAULT_CACHE_PATH + str(self.song_info.filename)
        if not os.path.isfile(cache_song):
            return False
        if os.path.getsize(cache_song) < 1024:
            return False

        audio_seg = AudioSegment.from_file(cache_song)

        interval = int(self.song_info.interval) * 1000
        duration = audio_seg.duration_seconds * 1000
        if duration > interval:
            return audio_seg
        else:
            return False
Example #18
0
def make_audio(bp, stories, audout="default.wav", bpout="default.csv"):
    """Generate wav with concatenated segments from blueprint."""
    print "----Loading Data----"
    df = pd.read_csv(bp)
    print df

    print "----Loading Audio----"
    story_dict = {}
    for key, fname in stories.iteritems():
        story_dict[key] = AudioSegment.from_file(fname)

    print "----Exporting Audio----"
    audio = aud_from_log(df, **story_dict)
    audio.export(audout, format=audout.split('.')[-1])

    print "----Saving Blueprint For Export----"
    df['old_name'] = df['name']
    df['name'] = audout
    update_splits(df)
    df.to_csv(bpout)
Example #19
0
    def 對檔案讀(cls, 音檔所在):
        try:
            with wave.open(音檔所在, 'rb') as wave音檔:
                一點幾位元組 = wave音檔.getsampwidth()
                一秒幾點 = wave音檔.getframerate()
                幾个聲道 = wave音檔.getnchannels()
                原始資料 = wave音檔.readframes(wave音檔.getnframes())
        except wave.Error as 錯誤:
            if 錯誤.args[0] != 'unknown format: 65534':
                raise

            '## Khn̄g tsia khah bē 有警告'
            "RuntimeWarning: Couldn't find ffmpeg or avconv"
            from pydub.audio_segment import AudioSegment
            pydub_wave = AudioSegment.from_wav(音檔所在)
            一點幾位元組 = pydub_wave.sample_width
            一秒幾點 = pydub_wave.frame_rate
            幾个聲道 = pydub_wave.channels
            原始資料 = pydub_wave.raw_data
        return cls.對參數轉(一點幾位元組, 一秒幾點, 幾个聲道, 原始資料)
Example #20
0
def convert_wav(file, counter):
    filename, ext = os.path.splitext(file)
    wav_dir = os.path.dirname(file) + "/wav"
    if not os.path.exists(wav_dir):
        os.mkdir(wav_dir)
    ext = ext[1:]
    song = AudioSegment.from_file(file, ext)
    msecs = song.duration_seconds * 1000
    halfpoint = msecs/2
    thir_sec = song[halfpoint:(halfpoint+15000)]
    sample_rate = thir_sec.frame_rate
    #standard decibel range
    normal_db = [-32.0, -18.0]
    #normalize sample before stripping a channel to avoid clipping
    thir_sec = normalize(thir_sec, sample_rate, normal_db)
    #convert to mono
    thir_sec = thir_sec.set_channels(1)
    new_filename = wav_dir + "/" + str(counter) + ".wav"
    thir_sec.export(new_filename, format="wav")
    return new_filename
Example #21
0
def 获取单首歌曲特征(file):  #fetch_index_label
    items = file.split('.')
    file_format = items[-1].lower()  #获取歌曲格式
    file_name = file[:-(len(file_format) + 1)]  #获取歌曲名称
    if file_format != 'wav':
        song = AudioSegment.from_file(file, format='mp3')
        file = file_name + '.wav'
        song.export(file, format='wav')
    try:
        rate, data = wavfile.read(file)
        mfcc_feas = mfcc(data, rate, numcep=13, nfft=2048)
        mm = np.transpose(mfcc_feas)  #先LDA再降唯,如果不进行转置?
        mf = np.mean(mm, axis=1)  # mf变成104维的向量
        mc = np.cov(mm)
        result = mf
        for i in range(mm.shape[0]):
            result = np.append(result, np.diag(mc, i))
#         os.remove(file)
        return result
    except Exception as msg:
        print(msg)
def wav_split(path):
    file = vedio_to_wav(path)
    main_wav_path = file
    path = os.path.dirname(file) + '/'
    sound_len = int(
        MediaInfo.parse(main_wav_path).to_data()['tracks'][0]['duration'])
    sound = AudioSegment.from_mp3(main_wav_path)
    part_file_list = list()
    min_ = sound_len / 1000
    if min_ > 60:
        n = int(min_ // 60)
        print(type(n))
        if n * 60 < min_:
            n += 1
    for i in range(n):
        start_time = i * 60 * 1000 + 1
        end_time = (i + 1) * 60 * 1000
        if end_time > sound_len * 1000:
            end_time = sound_len * 1000
        word = sound[start_time:end_time]
        part_file_name = 'part_sound_{}.wav'.format(i)
        word.export(part_file_name, format="wav")
        part_file_list.append(part_file_name)
    return part_file_list
Example #23
0
from scrampy.splice import parse_splits, update_splits, aud_from_log, expand_data, insert_gaps
from pydub.audio_segment import AudioSegment

# TODO: move to setup function (in a class)

# load load and audio
log = parse_splits('examples/aud/NTF/NotTheFall_sentence_splitpoints.txt')
#log['order'] = range(0, len(log))
audio = {'NTF': AudioSegment.from_mp3('examples/aud/NTF/NotTheFall.mp3')}

# shuffle
indx = [1,0]
#random.shuffle(indx)
new_log = log.iloc[indx].copy()

# make new audio
audlist = aud_from_log(new_log, **audio)
out = reduce(lambda x,y: x + y, audlist)

# update start and end points on new log
update_splits(new_log)
unscram_log = new_log.sort('order')

get_clip = lambda clip, log, ii: clip[log['start'][ii] : log['end'][ii]]

def test_update_splits():
    """First row of Updated splits has start value of 0"""
    assert new_log.iloc[0]['start'] == 0

def test_new_log_indx_order():
    """New log starts with segment 1 (i.e. the second segment)"""
Example #24
0
#-*- conding:utf-8 -*-
import pandas as pd
import numpy as np
from pydub.audio_segment import AudioSegment#pydub是处理音乐文件的一个库
from scipy.io import wavfile
from python_speech_features import mfcc
import os
import sys
song =AudioSegment.from_file('./我们的纪念.mp3',format="mp3")#先读一下
#切分歌曲
# song_split=song[-30*100:]
song.export("./我们的纪念.wav",format='wav')#转化为wav格式
rate,data=wavfile.read("./我们的纪念.wav")#每秒读取速度以及数据
print(rate)
print(data.shape)
mf_feat = mfcc(data,rate,numcep=13,nfft=2048)#数据转化为13维,频率改为2048
print(mf_feat.shape)
mm =np.mean(mf_feat,axis=0)#隐含了时序上的相关性
print(mm.shape)
mf = np.transpose(mf_feat)
mc = np.cov(mf)#协方差矩阵,里面的值也就是各个特征之间的协方差
print(mc.shape)
result = mm
#np.diag(mc,k)方针里面的值,k=0为对角线的元素,
for k in range(len(mm)):
    result=np.append(result,np.diag(mc,k))
print(result.shape)


Example #25
0
                    fw.setsampwidth(sampwidth)
                    fw.setframerate(framerate)
                    fw.writeframes(wave_data[left * windows:right *
                                             windows].tostring())
                    fw.close()
                    i = nextleft_tem - 1
                    break
                j = right_tem + 1
        i = i + 1


if __name__ == '__main__':
    # input mono channel wav file
    os.chdir(project_path + '/resource/')
    for test_speech in glob.glob('*.mp3'):
        fr = AudioSegment.from_mp3(test_speech)
        framerate, nframes, sampwidth = fr.frame_rate, fr.frame_count(
        ), fr.sample_width
        index = fr.split_to_mono()
        num = 1
        # output  record
        file = open(
            project_path + '/speech_separation/' +
            os.path.splitext(os.path.basename(test_speech))[0] + '.txt', 'w')
        for mp3_data in index:
            exit_value = False
            mp3_data_array = mp3_data.get_array_of_samples()
            # wave_data = np.fromstring(mp3_data_array, dtype=np.short)
            speech_separate(mp3_data_array)
            num += 1
        file.close()
Example #26
0
import os
import sys
import pandas
from scipy.io import wavfile
from python_speech_features.base import mfcc
from pydub.audio_segment import AudioSegment


# 音频的截取,通过时间片段来获取部分音频
# 因为音频以毫秒计时,在截取音频时,统统转为了毫秒
AudioSegment.converter = r'../../software/ffmpeg/bin/ffmpeg.exe'
# song = AudioSegment.from_file('./a.MP3', format='MP3')  # 测试只支持wav格式,不支持MP3
sound = AudioSegment.from_mp3("a.wav")    # 测试只支持wav格式,不支持MP3
part = sound[21500:100000]
part.export("./part.wav", format="wav")
print(sound)


# 比较大的音频文件,将分钟和秒进行结合,然后在自己拆分
def segeS(start_time, end_time):
	start_time = (int(start_time.split(":")[0])*60 + int(start_time.split(":")[1])) * 1000
	end_time = (int(end_time.split(':')[0])*60+int(end_time.split(':')[1]))*1000
	# 格式
	# start_time = "0:35"
	# end_time = "0:38"

# https://blog.csdn.net/xuqingda/article/details/86540333
# python+ffmpeg实现wav文件切割
# 单文件切割成小文件,这样实现了数据容量的扩增。

import os
Example #27
0
#coding:utf-8

from pydub.audio_segment import AudioSegment#pydub是python中用户处理音频文件的一个库
from scipy.io import wavfile
from python_speech_features.base import mfcc #傅里叶变换+梅尔倒谱
import pandas as pd
import numpy as np
import sys


#mfcc 包含了两个步骤,一个是傅里叶变换,一个是梅尔倒谱系数
song = AudioSegment.from_file('./data/灰姑娘.mp3', format = 'mp3')#读入歌曲
# song_split = song[-30*1000:]#切分歌曲
song.export('./data/灰姑娘.wav', format= 'wav')#MP3到wav的转换
rate, data = wavfile.read('./data/灰姑娘.wav')#每秒播放速度及数据
mf_feat = mfcc(data, rate, numcep = 13, nfft = 2048)#傅里叶变换速度每秒多少帧
#  numcep = 13 越大越慢
# 108键, 小于1/4 欢快,大于1/4悲伤

print(mf_feat)
print(mf_feat.shape)
sys.exit(0)
# df = pd.DataFrame(mf_feat)
# df.to_csv('./mfFeat.csv')
# print(mf_feat)
# print(mf_feat.shape)
mm = np.mean(mf_feat, axis = 0)#隐含了时域上的相关性
mf = np.transpose(mf_feat)
mc = np.cov(mf) #原mf_feat矩阵列的协方差矩阵
# print(mc)
result = mm
Example #28
0
    def cache_from_url(self):
        """
        从一个 URL 地址获取音乐数据,并缓存在临时目录中
        实际装载的过程是首先检查缓存目录中是否存在有效的音乐副本和封面图片副本
        如果有,就直接从缓存播放,否则从网络下载,并缓存
        :return: 返回缓存的临时文件对象
        """
        self.emit(SIGNAL('before_cache()'))
        self.is_stop = False
        self.exception_list = []
        try:
            if self.song_info.song_url is None:
                tencent = TencentProtocol()
                tencent.get_play_key(self.song_info)
                tencent.get_song_address(self.song_info)
                tencent.get_image_address(self.song_info)
                if tencent.has_exception:
                    self.exception_list += tencent.exception_list
                    raise tencent.exception_list[0]

            # 首先尝试从本地缓存中载入封面图片,未找到则下载并缓存
            self.image_data = self.check_image_cache()
            if not self.image_data:
                """
                从网络读取专辑封面,并写入本地缓存文件
                """
                self.image_data = requests.get(
                    self.song_info.image_url).content
                cache_image_path = QM_DEFAULT_CACHE_PATH + str(
                    self.song_info.mid) + '.jpg'
                if os.path.isfile(cache_image_path):
                    os.remove(cache_image_path)
                with open(cache_image_path, 'wb') as cover_file:
                    cover_file.write(self.image_data)

            # 首先尝试从本地缓存中载入音频文件,未找到则下载并缓存
            cache_audio = self.check_audio_cache()
            if isinstance(cache_audio, AudioSegment):
                """
                从缓存载入音频
                """
                self.audio_segment = cache_audio
                self.emit(SIGNAL('caching()'))
            else:
                """
                从网络缓存音频,并写入本地缓存
                """
                request = Request(self.song_info.song_url)
                pipe = urlopen(url=request, timeout=QM_TIMEOUT)

                cache_file = QM_DEFAULT_CACHE_PATH + str(
                    self.song_info.filename)
                if os.path.isfile(cache_file):
                    os.remove(cache_file)
                with open(cache_file, 'wb') as audio_file:
                    while True:
                        data = pipe.read(QM_BUFFER_SIZE)

                        if self.is_stop or data is None or len(data) == 0:
                            audio_file.close()
                            break

                        # print 'wirte >>> ' + str(self.song_info.name) + ' >>> ' + filename
                        audio_file.write(data)
                        sleep(0.01)
                        self.audio_segment = AudioSegment.from_file(
                            audio_file.name)
                        self.emit(SIGNAL('caching()'))
                    audio_file.close()
        except RuntimeError as e:
            e.message += u"运行时错误。"
            self.exception_list.append(e)
        except BaseException as e:
            e.message += u"获取音乐数据错误。"
            self.exception_list.append(e)

        self.is_stop = True
        self.emit(SIGNAL('after_cache()'))
Example #29
0
def text2wav(text, language='en', filename='temp', tld='cn'):
    gTTS(text=text, tld=tld, lang=language).save(filename + ".mp3")
    AudioSegment.from_mp3(filename + ".mp3").set_frame_rate(16000).export(
        filename + ".wav", format="wav")
# _*_ coding: utf-8 _*_
# @Time     : 2018/4/19 下午3:42
# @Author   : yu yongsheng
# @FileName : channel_split_pydub2.py.py
# @Software :
# @Description: split_channel_pydub

import os
import time
import glob
import numpy as np
from pydub.audio_segment import AudioSegment

project_path = os.getcwd()

st = time.time()

os.chdir(project_path + '/speech_separate_data/')
for path in glob.glob('*.mp3'):
    print path
    mp3_file = AudioSegment.from_mp3(path)
    # mp3_file = mp3_file.set_frame_rate(22050)
    # each index in split_to_mono() represent a channel(0-left/1-right)
    index = mp3_file.split_to_mono()
    index[0].export(project_path + "/channel_data/" +
                    os.path.splitext(os.path.basename(path))[0] + '_left.wav',
                    format="wav")
    index[1].export(project_path + "/channel_data/" +
                    os.path.splitext(os.path.basename(path))[0] + '_right.wav',
                    format="wav")
print time.time() - st
def mp2Wav(source_file_path, destin_path):
    sound = AudioSegment.from_mp3(source_file_path)
    sound.export(destin_path, format='wav')
Example #32
0
"""speech data process.
"""

from pydub.audio_segment import AudioSegment

if __name__ == '__main__':
    audio = AudioSegment.from_file('102.wav')
    print(audio.frame_rate)
Example #33
0
            encoded_samples.append(encoded_sample)
                
        encoded_audio = wave.open(watermarked_output_path, 'wb')
        encoded_audio.setparams( (nchannels, sampwidth, framerate, nframes, comptype, compname) )
        #设置参数
        encoded_audio.writeframes(struct.pack("%dh" % len(encoded_samples), *encoded_samples))
        #写入output路径
        
    def watermark_to_bits(self,watermark, nbits=8):
        watermark_bits = []
        for byte in watermark:
            for i in range(0,nbits):
                watermark_bits.append( (byte & (2 ** i)) >> i )
        return watermark_bits
        
    
if __name__ == "__main__":
    cover_audio = "ori.wav"
    output = "w.wav"
    source = sys.argv[2]
    sound = AudioSegment.from_mp3(source)
    sound.export(cover_audio, format = 'wav')
    if len(sys.argv) > 1:
        message = sys.argv[1]
        if len(sys.argv) > 3:
            output = sys.argv[3]
  
    # lsb_watermark(cover_audio, message, output)
    
    #print(recover_lsb_watermark(output))