Ejemplo n.º 1
0
def AUDIO(s, audios, intervals, times, gain=None, epsilon=.4):
	for idx, audio in enumerate(audios):
		if idx == 0:
			a = AudioSegment.from_mp3(audio)[1000 * intervals[idx][0]:1000 * (intervals[idx][1] + epsilon)]
			s.renderer.file_writer.add_audio_segment(a, times[idx])
		else:
			a = AudioSegment.from_mp3(audio)[1000 * intervals[idx][0]:1000 * (intervals[idx][1] + epsilon)]
			s.renderer.file_writer.add_audio_segment(a, times[idx], gain)
def download(url):
    options = {
        'format':
        'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl':
        'largeout.webm',  # name the file the ID of the video
        'logger':
        MyLogger(),
        'progress_hooks': [my_hook],
    }
    with youtube_dl.YoutubeDL(options) as ydl:
        info = ydl.extract_info(url,
                                download=False)  # don't download, much faster
        description = info['description']
        totalDuration = info['duration']
        yt_name = info['title']
        ydl.download([url])
    entries = Parsers.catalystTextParse("catalystdesc.txt", totalDuration)
    #entries = Parsers.dubstepParse(description, totalDuration)
    print("Loading file...")
    # load sound
    sound = AudioSegment.from_mp3("largeout.mp3")
    split_export(entries, sound, yt_name)
Ejemplo n.º 3
0
def get_second_part_wav(main_wav_path, start_time, end_time, part_wav_path):
    '''
    音频切片,获取部分音频 单位是秒级别
    :param main_wav_path: 原音频文件路径
    :param start_time:  截取的开始时间
    :param end_time:  截取的结束时间
    :param part_wav_path:  截取后的音频路径
    :return:
    '''
    start_time = int(start_time) * 1000
    end_time = int(end_time) * 1000

    sound = AudioSegment.from_mp3(main_wav_path)
    word = sound[start_time:end_time]

    word.export(part_wav_path, format="wav")
Ejemplo n.º 4
0
 def getVoice(self, text, filename):
     # 2. 向Rest接口提交数据
     data=parse.urlencode([
         ('tex',text),
         ('lan','zh'),
         ('cuid',self.cu_id),
         ('ctp',1),
         ('tok',self.token_str)
     ])
     f=urlopen(Request(self.getvoice_url),data = data.encode('utf-8'))
     voice_fp = open(self.locpla+filename,'wb')
     voice_fp.write(f.read())
     voice_fp.close()
     f.close()
     sound = AudioSegment.from_mp3(self.locpla + "turing.mp3")
     sound.export(self.locpla + 'turing.wav', format="wav")
def wav_split(path):
    file = vedio_to_wav(path)
    main_wav_path = file
    path = os.path.dirname(file) + '/'
    sound_len = int(
        MediaInfo.parse(main_wav_path).to_data()['tracks'][0]['duration'])
    sound = AudioSegment.from_mp3(main_wav_path)
    part_file_list = list()
    min_ = sound_len / 1000
    if min_ > 60:
        n = int(min_ // 60)
        print(type(n))
        if n * 60 < min_:
            n += 1
    for i in range(n):
        start_time = i * 60 * 1000 + 1
        end_time = (i + 1) * 60 * 1000
        if end_time > sound_len * 1000:
            end_time = sound_len * 1000
        word = sound[start_time:end_time]
        part_file_name = 'part_sound_{}.wav'.format(i)
        word.export(part_file_name, format="wav")
        part_file_list.append(part_file_name)
    return part_file_list
Ejemplo n.º 6
0
from scrampy.splice import parse_splits, update_splits, aud_from_log, expand_data, insert_gaps
from pydub.audio_segment import AudioSegment

# TODO: move to setup function (in a class)

# load load and audio
log = parse_splits('examples/aud/NTF/NotTheFall_sentence_splitpoints.txt')
#log['order'] = range(0, len(log))
audio = {'NTF': AudioSegment.from_mp3('examples/aud/NTF/NotTheFall.mp3')}

# shuffle
indx = [1,0]
#random.shuffle(indx)
new_log = log.iloc[indx].copy()

# make new audio
audlist = aud_from_log(new_log, **audio)
out = reduce(lambda x,y: x + y, audlist)

# update start and end points on new log
update_splits(new_log)
unscram_log = new_log.sort('order')

get_clip = lambda clip, log, ii: clip[log['start'][ii] : log['end'][ii]]

def test_update_splits():
    """First row of Updated splits has start value of 0"""
    assert new_log.iloc[0]['start'] == 0

def test_new_log_indx_order():
    """New log starts with segment 1 (i.e. the second segment)"""
Ejemplo n.º 7
0
def text2wav(text, language='en', filename='temp', tld='cn'):
    gTTS(text=text, tld=tld, lang=language).save(filename + ".mp3")
    AudioSegment.from_mp3(filename + ".mp3").set_frame_rate(16000).export(
        filename + ".wav", format="wav")
Ejemplo n.º 8
0
                    fw.setsampwidth(sampwidth)
                    fw.setframerate(framerate)
                    fw.writeframes(wave_data[left * windows:right *
                                             windows].tostring())
                    fw.close()
                    i = nextleft_tem - 1
                    break
                j = right_tem + 1
        i = i + 1


if __name__ == '__main__':
    # input mono channel wav file
    os.chdir(project_path + '/resource/')
    for test_speech in glob.glob('*.mp3'):
        fr = AudioSegment.from_mp3(test_speech)
        framerate, nframes, sampwidth = fr.frame_rate, fr.frame_count(
        ), fr.sample_width
        index = fr.split_to_mono()
        num = 1
        # output  record
        file = open(
            project_path + '/speech_separation/' +
            os.path.splitext(os.path.basename(test_speech))[0] + '.txt', 'w')
        for mp3_data in index:
            exit_value = False
            mp3_data_array = mp3_data.get_array_of_samples()
            # wave_data = np.fromstring(mp3_data_array, dtype=np.short)
            speech_separate(mp3_data_array)
            num += 1
        file.close()
Ejemplo n.º 9
0
def mp2Wav(source_file_path, destin_path):
    sound = AudioSegment.from_mp3(source_file_path)
    sound.export(destin_path, format='wav')
Ejemplo n.º 10
0
            encoded_samples.append(encoded_sample)
                
        encoded_audio = wave.open(watermarked_output_path, 'wb')
        encoded_audio.setparams( (nchannels, sampwidth, framerate, nframes, comptype, compname) )
        #设置参数
        encoded_audio.writeframes(struct.pack("%dh" % len(encoded_samples), *encoded_samples))
        #写入output路径
        
    def watermark_to_bits(self,watermark, nbits=8):
        watermark_bits = []
        for byte in watermark:
            for i in range(0,nbits):
                watermark_bits.append( (byte & (2 ** i)) >> i )
        return watermark_bits
        
    
if __name__ == "__main__":
    cover_audio = "ori.wav"
    output = "w.wav"
    source = sys.argv[2]
    sound = AudioSegment.from_mp3(source)
    sound.export(cover_audio, format = 'wav')
    if len(sys.argv) > 1:
        message = sys.argv[1]
        if len(sys.argv) > 3:
            output = sys.argv[3]
  
    # lsb_watermark(cover_audio, message, output)
    
    #print(recover_lsb_watermark(output))
Ejemplo n.º 11
0
import os
import sys
import pandas
from scipy.io import wavfile
from python_speech_features.base import mfcc
from pydub.audio_segment import AudioSegment


# 音频的截取,通过时间片段来获取部分音频
# 因为音频以毫秒计时,在截取音频时,统统转为了毫秒
AudioSegment.converter = r'../../software/ffmpeg/bin/ffmpeg.exe'
# song = AudioSegment.from_file('./a.MP3', format='MP3')  # 测试只支持wav格式,不支持MP3
sound = AudioSegment.from_mp3("a.wav")    # 测试只支持wav格式,不支持MP3
part = sound[21500:100000]
part.export("./part.wav", format="wav")
print(sound)


# 比较大的音频文件,将分钟和秒进行结合,然后在自己拆分
def segeS(start_time, end_time):
	start_time = (int(start_time.split(":")[0])*60 + int(start_time.split(":")[1])) * 1000
	end_time = (int(end_time.split(':')[0])*60+int(end_time.split(':')[1]))*1000
	# 格式
	# start_time = "0:35"
	# end_time = "0:38"

# https://blog.csdn.net/xuqingda/article/details/86540333
# python+ffmpeg实现wav文件切割
# 单文件切割成小文件,这样实现了数据容量的扩增。

import os
# _*_ coding: utf-8 _*_
# @Time     : 2018/4/19 下午3:42
# @Author   : yu yongsheng
# @FileName : channel_split_pydub2.py.py
# @Software :
# @Description: split_channel_pydub

import os
import time
import glob
import numpy as np
from pydub.audio_segment import AudioSegment

project_path = os.getcwd()

st = time.time()

os.chdir(project_path + '/speech_separate_data/')
for path in glob.glob('*.mp3'):
    print path
    mp3_file = AudioSegment.from_mp3(path)
    # mp3_file = mp3_file.set_frame_rate(22050)
    # each index in split_to_mono() represent a channel(0-left/1-right)
    index = mp3_file.split_to_mono()
    index[0].export(project_path + "/channel_data/" +
                    os.path.splitext(os.path.basename(path))[0] + '_left.wav',
                    format="wav")
    index[1].export(project_path + "/channel_data/" +
                    os.path.splitext(os.path.basename(path))[0] + '_right.wav',
                    format="wav")
print time.time() - st