async def initiation(kutana): from spleeter.audio.adapter import get_default_audio_adapter from spleeter.separator import Separator environ["GITHUB_REPOSITORY"] = "bruce-willis/Audio-denoiser" environ["GITHUB_RELEASE"] = "v0.1" config_url = "https://raw.githubusercontent.com/bruce-willis/Audio-denoiser/develop/src/training/config/voice_config.json" config_path = "voice_config.json" _ = urllib.request.urlretrieve(url=config_url, filename=config_path) separator = Separator(config_path) predictor = separator._get_predictor() plugin.predictor = predictor adapter = get_default_audio_adapter() plugin.adapter = adapter
default='spleeter:4stems', help='model to attack') parser.add_argument('--rms', type=float, default=0.01, help='attack strength') args = parser.parse_args() filename = args.input outputname = args.output if '/' not in outputname: outputname = './' + outputname modelname = args.params_filename # load model print("load model") separator = Separator(modelname, stft_backend='tensorflow') separator._params['attack'] = 4 predictor = separator._get_predictor() # load audio print("load audio") audio_loader = get_default_audio_adapter() sample_rate = 44100 waveform, _ = audio_loader.load(filename, sample_rate=sample_rate) print(waveform.dtype) print("max amplitude: {}".format(np.max(np.abs(waveform)))) # compute spectrogram print("compute stft") frame_length = separator._params['frame_length'] frame_step = separator._params['frame_step'] with predictor.graph.as_default():
class AudioDetect: def __init__(self, model_path_1, model_path_2): self.spleeter = Separator('spleeter:2stems', model_path_1) # 基于频域进行音轨分离,分离人声的话一般只需要2轨,accompaniment.wav 提取的背景/伴奏; vocals.wav是提取的人声 self.spleeter._get_predictor() self.ina_speech_segmenter = Segmenter(detect_gender=False, model_dir=model_path_2) ###### logging.info("init done") def file_base_name(self, file_path): return Path(file_path).resolve().stem def spleeter_volcals_file_name(self, input_file, output_dir): input_base_name = self.file_base_name(input_file) return output_dir + "/" + input_base_name + "/vocals.wav" # get def do_spleeter_from_buffer(self, input_buffer): waveform = buffer_utils.buffer_to_wave_for_spleeter( input_buffer, 44100) sources = self.spleeter.separate(waveform) return sources['vocals'] def do_spleeter(self, input_file, out_dir): # 分轨文件目录 out_dir self.spleeter.separate_to_file( input_file, out_dir, filename_format='{filename}/{instrument}.{codec}') return True def do_segment_from_buffer(self, input_buffer): with warnings.catch_warnings(): warnings.simplefilter("ignore") mspec, loge, difflen = buffer_utils.feat_from_spleeter_vocals_for_segment_two_transcode( input_buffer) segmention = self.ina_speech_segmenter.segment_feats( mspec, loge, difflen, 0) return (True, segmention) def do_segment(self, input, output_dir): with warnings.catch_warnings(): warnings.simplefilter("ignore") segmention = self.ina_speech_segmenter( self.spleeter_volcals_file_name(input, output_dir)) return (True, segmention) def process_segmention(self, result_dic, segmention): last_lable = "" last_start = -1 last_end = -1 segments = [] for segment in segmention: label = segment[0] label = self.map_label(label) start = round(float(segment[1]), 2) end = round(float(segment[2]), 2) if last_lable == "": last_lable = label last_start = start last_end = end continue if last_lable == label: last_end = end continue else: if last_lable == "speech": segments.append({ "type": "speech", "startSec": last_start, "endSec": last_end }) last_lable = label last_start = start last_end = end if last_lable == "speech": segments.append({ "type": "speech", "startSec": last_start, "endSec": last_end }) result_dic["segments"] = segments def map_label(self, label): speech_labels = ["music", "speech"] if label in speech_labels: return "speech" return "noEnergy" def process_from_buffer(self, input_buffer, input_file): result_dic = {} result_dic.clear() input_base_name = os.path.basename(input_file) result_dic["fileName"] = input_base_name vocals_data = self.do_spleeter_from_buffer(input_buffer) if vocals_data is None: logging.error("separate failed") return json.dumps(result_dic, ensure_ascii=False) result, segmention = self.do_segment_from_buffer( vocals_data) # make sure vocals_data is 16kHz if not result: logging.error("segment failed") return json.dumps(result_dic, ensure_ascii=False) self.process_segmention(result_dic, segmention) return json.dumps(result_dic, ensure_ascii=False) def process(self, input, output): result_dic = {} result_dic.clear() input_base_name = os.path.basename(input) result_dic["fileName"] = input_base_name if not self.do_spleeter(input, output): ### step 1 logging.error("separate failed") return json.dumps(result_dic, ensure_ascii=False) result, segmention = self.do_segment(input, output) ### step 2 if not result: logging.error("segment failed") return json.dumps(result_dic, ensure_ascii=False) self.process_segmention(result_dic, segmention) return json.dumps(result_dic, ensure_ascii=False)
# Importing the generated codes from buildproto.sh import service.service_spec.sound_spleeter_pb2_grpc as grpc_bt_grpc from service.service_spec.sound_spleeter_pb2 import Output import service.sound_spleeter as ss # TensorFlow. import tensorflow as tf # Using session to consume less GPU memory tf_session_config = tf.ConfigProto() tf_session_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_session_config) from spleeter.separator import Separator separator = Separator("spleeter:2stems") separator._get_predictor() # Hacky! logging.basicConfig(level=10, format="%(asctime)s - [%(levelname)8s] - %(name)s - %(message)s") log = logging.getLogger("sound_spleeter_service") # Create a class to be added to the gRPC server # derived from the protobuf codes. class SoundSpleeterServicer(grpc_bt_grpc.SoundSpleeterServicer): def __init__(self): # Just for debugging purpose. log.debug("SoundSpleeterServicer created") @staticmethod def spleeter(request, context):