Exemple #1
0
 def test_batch(self):
     seg = Segmenter(vad_engine='sm')
     with tempfile.TemporaryDirectory() as tmpdirname:
         lout = [os.path.join(tmpdirname, '1.csv'), os.path.join(tmpdirname, '2.csv')]
         ret = seg.batch_process(['./media/musanmix.mp3', './media/musanmix.mp3'], lout)
         self.assertTrue(filecmp.cmp(lout[0], lout[1]))
         self.assertTrue(filecmp.cmp(lout[0], './media/musanmix-sm-gender.csv'))
Exemple #2
0
    def __init__(self, model_path_1, model_path_2):
        self.spleeter = Separator('spleeter:2stems', model_path_1)
        # 基于频域进行音轨分离,分离人声的话一般只需要2轨,accompaniment.wav  提取的背景/伴奏; vocals.wav是提取的人声
        self.spleeter._get_predictor()

        self.ina_speech_segmenter = Segmenter(detect_gender=False,
                                              model_dir=model_path_2)  ######
        logging.info("init done")
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-g", "--detect_gender", type=bool, default=False, help="Enable gender detection")
    parser.add_argument("-d", "--vad_engine", choices=['sm', 'smn'], default='smn', help="Voice activity detection:  smn (default) or sm")
    parser.add_argument("-b", "--ffmpeg_binary", default='ffmpeg', help="FFMPEG binary")
    parser.add_argument("input", help="Input file")
    parser.add_argument("output", help="Output file")
    args = parser.parse_args()

    seg = Segmenter(vad_engine=args.vad_engine, detect_gender=args.detect_gender, ffmpeg=args.ffmpeg_binary)
    seg.batch_process([args.input], [args.output], verbose=True)
Exemple #4
0
def run_inaseg(input_wav, csv_out_dir):
    # load neural network into memory, may last few seconds
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=MatplotlibDeprecationWarning)
        from inaSpeechSegmenter import Segmenter
    seg = Segmenter(vad_engine="smn", detect_gender=True)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        stem = input_wav.stem
        input_files = [str(input_wav)]
        output_files = [str(csv_out_dir / f"{stem}.csv")]
        seg.batch_process(input_files, output_files, verbose=True)
    #subprocess.run(["ina_speech_segmenter.py", "-i", input_wav, "-o", csv_out_dir])
    return get_csv_path(input_wav, csv_out_dir)
def split(file_name, out_dir):
    print('\nREMOVE MUSIC AND CUT')
    seg = Segmenter()
    segmentation = seg(file_name)
    sample_rate, raw_audio = scipy.io.wavfile.read(file_name)
    #raw_audio , sr = librosa.load(file_name, sr=16000)
    speech = []
    print(segmentation)
    count = 1
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    list_file = []
    for s in segmentation:
        if s[0] != 'Music' and s[0] != 'NOACTIVITY':
            print(str(count), 'dur of sen:', s[2] - s[1])
            speech_data = raw_audio[int(s[1] * sample_rate) -
                                    int(sample_rate /
                                        4):int(s[2] * sample_rate +
                                               int(sample_rate / 4))]
            speech_data = np.array(speech_data)

            print(len(speech_data), len(speech_data) / sample_rate)
            if len(speech_data) / sample_rate < 0.5 or len(
                    speech_data) / sample_rate > 20:
                continue
            else:
                out_filename = out_dir + '/' + file_name.split(
                    '/')[-1].replace('.wav', '') + '_' + str(count) + '.wav'
                list_file.append(out_filename)
                scipy.io.wavfile.write(out_filename, sample_rate, speech_data)
                count += 1
    return list_file
Exemple #6
0
 def __init__(self, filename):
     self.filename = filename
     self.segmenter = Segmenter(vad_engine='smn',
                                detect_gender=False,
                                ffmpeg='ffmpeg')
     self._find_music()
     self._trim()
def removeMusicAndCut(file_name, out_dir):
    print('\nREMOVE MUSIC AND CUT')
    seg = Segmenter()
    segmentation = seg(file_name)
    sample_rate, raw_audio = scipy.io.wavfile.read(file_name)
    speech = []
    print(segmentation)
    count = 1
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    for s in segmentation:
        if s[0] != 'Music' and s[0] != 'NOACTIVITY':
            #speech.append(s)
            print(str(count), 'dur of sen:', s[2] - s[1])
            speech_data = raw_audio[int(s[1] * sample_rate):int(s[2] *
                                                                sample_rate)]
            speech_data = np.array(speech_data)

            print(len(speech_data), len(speech_data) / sample_rate)
            if len(speech_data) / sample_rate < 1.0 or len(
                    speech_data) / sample_rate > 10:
                continue
            else:
                scipy.io.wavfile.write(
                    out_dir + '/' +
                    file_name.split('/')[-1].replace('.wav', '') + '_' +
                    str(count) + '.wav', sample_rate, speech_data)
                count += 1
Exemple #8
0
def getclips(media):
    seg = Segmenter()
    segmentation = seg(media)
    print("all seg", segmentation)
    timestamp = []
    for i in segmentation:
        if i[0] == 'music':
            print("find out! music", i)
            if len(timestamp):
                if i[2] - timestamp[0] < 35:
                    print("Too short, wait more", i[2], timestamp[0],
                          i[2] - timestamp[0])
                    continue
                elif i[2] - timestamp[0] > 55:
                    print("Too long")
                    break
                timestamp[1] = i[2]
            else:
                timestamp.append(i[1])
                timestamp.append(i[2])

    print("music", timestamp)
    for i in timestamp:
        print(int(i))

    newname = "%s.mp3" % (datetime.datetime.now().strftime("%Y%m%d"))
    #print("ffmpeg -ss %d -i %s -to %d %s" % ( timestamp[0], media, int(timestamp[1]-timestamp[0])+1, newname))
    print("ffmpeg -ss %f -i %s -to %f %s -y " %
          (timestamp[0], media, timestamp[1] - timestamp[0], newname))
    os.system("ffmpeg -ss %f -i %s -to %f %s -y" %
              (timestamp[0], media, (timestamp[1] - timestamp[0]), newname))
    os.system("/usr/local/bin/telegram-send --caption %s --file %s" %
              (datetime.datetime.now().strftime("%Y%m%d"), newname))
Exemple #9
0
 def test_stopsec(self):
     # test stop_sec argument
     seg = Segmenter()
     stop_sec = 5.
     for lab, start, stop in seg('./media/musanmix.mp3', stop_sec=stop_sec):
         self.assertLessEqual(stop, stop_sec)
         self.assertLessEqual(start, stop_sec)
Exemple #10
0
 def test_processingresult(self):
     seg = Segmenter(vad_engine='sm')
     ret = seg('./media/musanmix.mp3')
     df = pd.read_csv('./media/musanmix-sm-gender.csv', sep='\t')
     ref = [(l.labels, float(l.start), float(l.stop)) for _, l in df.iterrows()]
     self.assertEqual([e[0] for e in ref], [e[0] for e in ret])
     np.testing.assert_almost_equal([e[1] for e in ref], [e[1] for e in ret])
     np.testing.assert_almost_equal([e[2] for e in ref], [e[2] for e in ret])
Exemple #11
0
def recognize(audioFile):
    gender = []
    seg = Segmenter()
    segmentation = seg(audioFile)
    for i in segmentation:
        if 'noEnergy' not in i[0]:
            gender.append(i[0])
    return (max(set(gender), key=gender.count))
Exemple #12
0
def _iina_segmentation(input_file):
    seg = Segmenter()
    segmentation = seg(input_file)
    result = []
    for segment in segmentation:
        if segment[0] not in ('energy', 'noEnergy', 'noise', 'music'):
            result.append((segment[1], segment[2]))

    return result
Exemple #13
0
def wav2seg(args, input_files):
    segmentations = []
    detect_gender = bool(distutils.util.strtobool(args.detect_gender))
    seg = Segmenter(vad_engine=args.vad_engine,
                    detect_gender=detect_gender,
                    ffmpeg=args.ffmpeg_binary)
    for input_file in input_files:
        segmentations += seg(input_file)

    print(segmentations)
    return segmentations
Exemple #14
0
def main():
    list_result = []
    seg = Segmenter(detect_gender= True)
    ext = "."+str(sys.argv[1])
    user =  str(sys.argv[2])
    Path = os.getcwd()+"/CorpusM/*"
    folders = getFolders(Path)
    folders = sorted(folders)
    Edades = Range_old(folders, ext)
    Generos = Gender(folders, ext,seg)
    data = Reporte(folders,Generos,Edades,'Mexico')
    WriteResult(data,user) 
Exemple #15
0
    def test_boundaries(self):

        def seg2str(iseg, tseg):
            label, start, stop  = tseg
            return 'seg %d <%s, %f, %f>' % (iseg, label, start, stop)
        
        seg = Segmenter()
        ret = seg('./media/musanmix.mp3')
        for i in range(len(ret) -1):
            curstop = ret[i][2]
            nextstart = ret[i+1][1]
            self.assertEqual(curstop, nextstart,
                             '%s VS %s' % (seg2str(i, ret[i]), seg2str(i+1, ret[i+1])))
 def test_processingresult(self):
     seg = Segmenter(vad_engine='sm')
     ret = seg('./media/musanmix.mp3')
     ref = [('music', 0.0, 22.48), ('noEnergy', 22.48, 29.080000000000002),
            ('male', 29.080000000000002, 32.480000000000004),
            ('music', 32.480000000000004, 52.800000000000004),
            ('noEnergy', 52.800000000000004, 54.78),
            ('music', 54.78, 55.74), ('noEnergy', 55.74, 63.34),
            ('male', 63.34, 68.26), ('noEnergy', 68.26, 68.92),
            ('male', 68.92, 71.60000000000001),
            ('noEnergy', 71.60000000000001, 72.0),
            ('male', 72.0, 73.82000000000001),
            ('noEnergy', 73.82000000000001, 74.5)]
     self.assertEqual(ref, ret)
def main():
    (input_file, json_file) = sys.argv[1:3]

    # Run ina_speech_segmenter on input file
    # the result is a list of tuples
    # each tuple contains:
    # * label in 'Male', 'Female', 'Music', 'NOACTIVITY'
    # * start time of the segment
    # * end time of the segment
    seg = Segmenter()
    segmentation = seg(input_file)

    # Convert the resulting list of tuples to an object for serialization
    seg_schema = convert_to_segmentation_schema(input_file, segmentation)

    # Serialize the json and write it to destination file
    write_output_json(seg_schema, json_file)
    exit(0)
    def classify(self):
        self.seg = Segmenter()
        counter = 0
        for audioPath in self.media:
            startTime = int(round(time.time()))
            vid = audioPath.split("/")[-1]
            print("### {}/{} Processing {} ###".format(counter,
                                                       len(self.media), vid))
            tmp = self.seg(audioPath)
            tmp2 = str(tmp)
            self.segmentation.append(tmp)
            if ("Male" in tmp2 or "Female" in tmp2) and "Music" in tmp2:
                self.results.append("Mixed")
            elif "Music" in tmp2:
                self.results.append("Music")
            elif "Male" in tmp2 or "Female" in tmp2:
                self.results.append("Speech")

            endTime = int(round(time.time()))
            self.times.append(endTime - startTime)
            counter += 1
Exemple #19
0
    def classify(self):
        if self.algo == "ina":
            self.seg = Segmenter()

        counter = 0
        for audioPath in self.media:
            startTime = int(round(time.time()))
            vid = audioPath.split("/")[-1]
            print("### {}/{} Processing {} ###".format(counter,
                                                       len(self.media), vid))
            if self.algo == "ina":
                tmp = self.seg(audioPath)
                tmp2 = str(tmp)
                self.segmentation.append(tmp)
                if ("Male" in tmp2 or "Female" in tmp2) and "Music" in tmp2:
                    self.results.append("Mixed")
                elif "Music" in tmp2:
                    self.results.append("Music")
                elif "Male" in tmp2 or "Female" in tmp2:
                    self.results.append("Speech")

            elif self.algo == "paa":
                [flagsInd, classesAll, acc,
                 CM] = aS.mtFileClassification(audioPath, "svmSM/svmSM", "svm",
                                               False, '')
                res = np.array(flagsInd).mean()
                if res <= 0.1:
                    self.results.append("Speech")
                elif res >= 0.9:
                    self.results.append("Music")
                else:
                    self.results.append("Mixed")

            endTime = int(round(time.time()))
            self.times.append(endTime - startTime)
            counter += 1
Exemple #20
0
class AudioDetect:
    def __init__(self, model_path_1, model_path_2):
        self.spleeter = Separator('spleeter:2stems', model_path_1)
        # 基于频域进行音轨分离,分离人声的话一般只需要2轨,accompaniment.wav  提取的背景/伴奏; vocals.wav是提取的人声
        self.spleeter._get_predictor()

        self.ina_speech_segmenter = Segmenter(detect_gender=False,
                                              model_dir=model_path_2)  ######
        logging.info("init done")

    def file_base_name(self, file_path):
        return Path(file_path).resolve().stem

    def spleeter_volcals_file_name(self, input_file, output_dir):
        input_base_name = self.file_base_name(input_file)
        return output_dir + "/" + input_base_name + "/vocals.wav"  # get

    def do_spleeter_from_buffer(self, input_buffer):
        waveform = buffer_utils.buffer_to_wave_for_spleeter(
            input_buffer, 44100)
        sources = self.spleeter.separate(waveform)
        return sources['vocals']

    def do_spleeter(self, input_file, out_dir):  # 分轨文件目录 out_dir
        self.spleeter.separate_to_file(
            input_file,
            out_dir,
            filename_format='{filename}/{instrument}.{codec}')
        return True

    def do_segment_from_buffer(self, input_buffer):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            mspec, loge, difflen = buffer_utils.feat_from_spleeter_vocals_for_segment_two_transcode(
                input_buffer)
            segmention = self.ina_speech_segmenter.segment_feats(
                mspec, loge, difflen, 0)
        return (True, segmention)

    def do_segment(self, input, output_dir):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            segmention = self.ina_speech_segmenter(
                self.spleeter_volcals_file_name(input, output_dir))

        return (True, segmention)

    def process_segmention(self, result_dic, segmention):
        last_lable = ""
        last_start = -1
        last_end = -1
        segments = []
        for segment in segmention:
            label = segment[0]
            label = self.map_label(label)
            start = round(float(segment[1]), 2)
            end = round(float(segment[2]), 2)
            if last_lable == "":
                last_lable = label
                last_start = start
                last_end = end
                continue
            if last_lable == label:
                last_end = end
                continue
            else:
                if last_lable == "speech":
                    segments.append({
                        "type": "speech",
                        "startSec": last_start,
                        "endSec": last_end
                    })
                last_lable = label
                last_start = start
                last_end = end

        if last_lable == "speech":
            segments.append({
                "type": "speech",
                "startSec": last_start,
                "endSec": last_end
            })
        result_dic["segments"] = segments

    def map_label(self, label):
        speech_labels = ["music", "speech"]
        if label in speech_labels:
            return "speech"
        return "noEnergy"

    def process_from_buffer(self, input_buffer, input_file):
        result_dic = {}
        result_dic.clear()
        input_base_name = os.path.basename(input_file)
        result_dic["fileName"] = input_base_name

        vocals_data = self.do_spleeter_from_buffer(input_buffer)
        if vocals_data is None:
            logging.error("separate failed")
            return json.dumps(result_dic, ensure_ascii=False)

        result, segmention = self.do_segment_from_buffer(
            vocals_data)  # make sure vocals_data is 16kHz
        if not result:
            logging.error("segment failed")
            return json.dumps(result_dic, ensure_ascii=False)

        self.process_segmention(result_dic, segmention)
        return json.dumps(result_dic, ensure_ascii=False)

    def process(self, input, output):
        result_dic = {}
        result_dic.clear()
        input_base_name = os.path.basename(input)
        result_dic["fileName"] = input_base_name

        if not self.do_spleeter(input, output):  ### step 1
            logging.error("separate failed")
            return json.dumps(result_dic, ensure_ascii=False)

        result, segmention = self.do_segment(input, output)  ### step 2
        if not result:
            logging.error("segment failed")
            return json.dumps(result_dic, ensure_ascii=False)

        self.process_segmention(result_dic, segmention)
        return json.dumps(result_dic, ensure_ascii=False)
 def __init__(self):
     print("\ncnn_segs init...")
     self.seg = Segmenter()
    help=
    "(default: 'true'). If set to 'true', segments detected as speech will be splitted into 'male' and 'female' segments. If set to 'false', segments corresponding to speech will be labelled as 'speech' (faster)"
)
args = parser.parse_args()

# Preprocess arguments and check their consistency
input_files = []
for e in args.input:
    input_files += glob.glob(e)
assert len(
    input_files
) > 0, 'No existing media selected for analysis! Bad values provided to -i (%s)' % args.input

odir = args.output_directory
assert os.access(odir, os.W_OK), 'Directory %s is not writable!' % odir

# Do processings
from inaSpeechSegmenter import Segmenter, seg2csv

# load neural network into memory, may last few seconds
detect_gender = bool(distutils.util.strtobool(args.detect_gender))
seg = Segmenter(vad_engine=args.vad_engine, detect_gender=detect_gender)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    for i, e in enumerate(input_files):
        print('processing file %d/%d: %s' % (i + 1, len(input_files), e))
        base, _ = os.path.splitext(os.path.basename(e))
        seg2csv(seg(e), '%s/%s.csv' % (odir, base))
Exemple #23
0
###--- utilsディレクトリはdatasetsディレクトリより上の階層にあるので ---###
#sys.path.append('../utils')
import vad_utils

if __name__ == '__main__':
    """---Get all wavdata path---"""
    args = vad_utils.parse_args()
    path_input = vad_utils.get_path(args.input_dir)
    path_output = vad_utils.get_path(args.output_dir)
    clean_test_wav = vad_utils.get_wav_data(path_input)
    """---Generate de-silence data---"""
    path_index = 0
    for i in clean_test_wav:
        #sr, input_data = wav.read(i)
        seg = Segmenter(vad_engine='smn', detect_gender=False)
        segmentation = seg(i)

        speech_segment_index = 0
        for segment in segmentation:
            segment_label = segment[0]

            if (segment_label == 'speech'):
                #Convert start time in section from s to ms
                start_time = segment[1] * 1000
                end_time = segment[2] * 1000

                # 分割結果をwavに出力
                newAudio = AudioSegment.from_wav(i)
                newAudio = newAudio[start_time:end_time]
                newAudio.export(path_output + "/segment" + str(path_index) +
Exemple #24
0
 def test_praat_export(self):
     seg = Segmenter()
     with tempfile.TemporaryDirectory() as tmpdirname:
         lout = [os.path.join(tmpdirname, '1.TextGrid')]
         ret = seg.batch_process(['./media/musanmix.mp3'], lout, output_format='textgrid')
         self.assertTrue(filecmp.cmp(lout[0], './media/musanmix-smn-gender.TextGrid'))       
Exemple #25
0
import Pyro4
import sys
import os
import socket

from inaSpeechSegmenter import Segmenter

if __name__ == '__main__':
    dname = os.path.dirname(os.path.realpath(__file__))

    hostname = socket.gethostname()

    uri = sys.argv[1]
    jobserver = Pyro4.Proxy(uri)

    ret = -1
    outname = 'init'

    # batch size set at 1024. Use lower values with small gpus
    g = Segmenter(batch_size=1024)

    while True:
        lsrc, ldst = jobserver.get_njobs('%s %s' % (hostname, ret))

        print(lsrc, ldst)
        if len(lsrc) == 0:
            print('job list finished')
            break

        ret = g.batch_process(lsrc, ldst, skipifexist=True, nbtry=3)
Exemple #26
0
 def test_short(self):
     seg = Segmenter(vad_engine='sm')
     ret = seg('./media/0021.mp3')
     ref = [('male', 0, 0.66)]
     self.assertEqual(ref, ret)
Exemple #27
0
 def test_execution(self):
     # if this test fails, then you should check to correctness of your
     # tensorflow installation
     seg = Segmenter()
     ret = seg('./media/musanmix.mp3')
Exemple #28
0
def recognize(audioFile):
    gender = []
    seg = Segmenter()
    segmentation = seg(audioFile)
    return (max(set(gender), key=gender.count))
Exemple #29
0
import moviepy.editor as mp
from ddsp.colab.colab_utils import upload

filenames, audios = upload()
tuple_file=filenames, audios[0][0]
video=tuple_file[0][0]
clip = mp.VideoFileClip("/content/"+video).subclip(0,20)
clip.audio.write_audiofile("audio.mp3")

"""##**Step 3: compute percentage**"""

#@title Click here to calculate the percentage of female/male voice speech (this may take a while).

from inaSpeechSegmenter import Segmenter, seg2csv
media = 'audio.mp3'
seg = Segmenter()
segmentation = seg(media)

female=0
male=0

for i in segmentation:
  duration = i[2] - i[1]
  if i[0] == "female":
    female=female + duration
  elif i[0] == "male":
    male=male + duration
total_speech= female + male

def percentage(part, whole):
  return 100 * float(part)/float(whole)
Exemple #30
0
 def test_init(self):
     seg = Segmenter()