Esempio n. 1
0
async def initiation(kutana):
    from spleeter.audio.adapter import get_default_audio_adapter
    from spleeter.separator import Separator

    environ["GITHUB_REPOSITORY"] = "bruce-willis/Audio-denoiser"
    environ["GITHUB_RELEASE"] = "v0.1"

    config_url = "https://raw.githubusercontent.com/bruce-willis/Audio-denoiser/develop/src/training/config/voice_config.json"
    config_path = "voice_config.json"
    _ = urllib.request.urlretrieve(url=config_url, filename=config_path)

    separator = Separator(config_path)
    predictor = separator._get_predictor()
    plugin.predictor = predictor

    adapter = get_default_audio_adapter()
    plugin.adapter = adapter
Esempio n. 2
0
                    default='spleeter:4stems',
                    help='model to attack')
parser.add_argument('--rms', type=float, default=0.01, help='attack strength')
args = parser.parse_args()

filename = args.input
outputname = args.output
if '/' not in outputname:
    outputname = './' + outputname
modelname = args.params_filename

# load model
print("load model")
separator = Separator(modelname, stft_backend='tensorflow')
separator._params['attack'] = 4
predictor = separator._get_predictor()

# load audio
print("load audio")
audio_loader = get_default_audio_adapter()
sample_rate = 44100
waveform, _ = audio_loader.load(filename, sample_rate=sample_rate)
print(waveform.dtype)
print("max amplitude: {}".format(np.max(np.abs(waveform))))

# compute spectrogram
print("compute stft")
frame_length = separator._params['frame_length']
frame_step = separator._params['frame_step']

with predictor.graph.as_default():
Esempio n. 3
0
class AudioDetect:
    def __init__(self, model_path_1, model_path_2):
        self.spleeter = Separator('spleeter:2stems', model_path_1)
        # 基于频域进行音轨分离,分离人声的话一般只需要2轨,accompaniment.wav  提取的背景/伴奏; vocals.wav是提取的人声
        self.spleeter._get_predictor()

        self.ina_speech_segmenter = Segmenter(detect_gender=False,
                                              model_dir=model_path_2)  ######
        logging.info("init done")

    def file_base_name(self, file_path):
        return Path(file_path).resolve().stem

    def spleeter_volcals_file_name(self, input_file, output_dir):
        input_base_name = self.file_base_name(input_file)
        return output_dir + "/" + input_base_name + "/vocals.wav"  # get

    def do_spleeter_from_buffer(self, input_buffer):
        waveform = buffer_utils.buffer_to_wave_for_spleeter(
            input_buffer, 44100)
        sources = self.spleeter.separate(waveform)
        return sources['vocals']

    def do_spleeter(self, input_file, out_dir):  # 分轨文件目录 out_dir
        self.spleeter.separate_to_file(
            input_file,
            out_dir,
            filename_format='{filename}/{instrument}.{codec}')
        return True

    def do_segment_from_buffer(self, input_buffer):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            mspec, loge, difflen = buffer_utils.feat_from_spleeter_vocals_for_segment_two_transcode(
                input_buffer)
            segmention = self.ina_speech_segmenter.segment_feats(
                mspec, loge, difflen, 0)
        return (True, segmention)

    def do_segment(self, input, output_dir):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            segmention = self.ina_speech_segmenter(
                self.spleeter_volcals_file_name(input, output_dir))

        return (True, segmention)

    def process_segmention(self, result_dic, segmention):
        last_lable = ""
        last_start = -1
        last_end = -1
        segments = []
        for segment in segmention:
            label = segment[0]
            label = self.map_label(label)
            start = round(float(segment[1]), 2)
            end = round(float(segment[2]), 2)
            if last_lable == "":
                last_lable = label
                last_start = start
                last_end = end
                continue
            if last_lable == label:
                last_end = end
                continue
            else:
                if last_lable == "speech":
                    segments.append({
                        "type": "speech",
                        "startSec": last_start,
                        "endSec": last_end
                    })
                last_lable = label
                last_start = start
                last_end = end

        if last_lable == "speech":
            segments.append({
                "type": "speech",
                "startSec": last_start,
                "endSec": last_end
            })
        result_dic["segments"] = segments

    def map_label(self, label):
        speech_labels = ["music", "speech"]
        if label in speech_labels:
            return "speech"
        return "noEnergy"

    def process_from_buffer(self, input_buffer, input_file):
        result_dic = {}
        result_dic.clear()
        input_base_name = os.path.basename(input_file)
        result_dic["fileName"] = input_base_name

        vocals_data = self.do_spleeter_from_buffer(input_buffer)
        if vocals_data is None:
            logging.error("separate failed")
            return json.dumps(result_dic, ensure_ascii=False)

        result, segmention = self.do_segment_from_buffer(
            vocals_data)  # make sure vocals_data is 16kHz
        if not result:
            logging.error("segment failed")
            return json.dumps(result_dic, ensure_ascii=False)

        self.process_segmention(result_dic, segmention)
        return json.dumps(result_dic, ensure_ascii=False)

    def process(self, input, output):
        result_dic = {}
        result_dic.clear()
        input_base_name = os.path.basename(input)
        result_dic["fileName"] = input_base_name

        if not self.do_spleeter(input, output):  ### step 1
            logging.error("separate failed")
            return json.dumps(result_dic, ensure_ascii=False)

        result, segmention = self.do_segment(input, output)  ### step 2
        if not result:
            logging.error("segment failed")
            return json.dumps(result_dic, ensure_ascii=False)

        self.process_segmention(result_dic, segmention)
        return json.dumps(result_dic, ensure_ascii=False)
# Importing the generated codes from buildproto.sh
import service.service_spec.sound_spleeter_pb2_grpc as grpc_bt_grpc
from service.service_spec.sound_spleeter_pb2 import Output

import service.sound_spleeter as ss

# TensorFlow.
import tensorflow as tf
# Using session to consume less GPU memory
tf_session_config = tf.ConfigProto()
tf_session_config.gpu_options.allow_growth = True
sess = tf.Session(config=tf_session_config)

from spleeter.separator import Separator
separator = Separator("spleeter:2stems")
separator._get_predictor()  # Hacky!


logging.basicConfig(level=10, format="%(asctime)s - [%(levelname)8s] - %(name)s - %(message)s")
log = logging.getLogger("sound_spleeter_service")


# Create a class to be added to the gRPC server
# derived from the protobuf codes.
class SoundSpleeterServicer(grpc_bt_grpc.SoundSpleeterServicer):
    def __init__(self):
        # Just for debugging purpose.
        log.debug("SoundSpleeterServicer created")

    @staticmethod
    def spleeter(request, context):