Example #1
0
def vad():
    logger.info(vad_sh)
    os.system(vad_sh)
    if not os.path.exists(segment):
        logger.error("vad failed!")
        os._exit(0)
    else:
        logger.info("vad succsee!")
Example #2
0
    def run(self):
        """
        """
        logger.debug("遍历wav_name,text信息")
        wav_suf = "wav"
        counter = 0
        whole_wavs = set()
        wavs_info_map = {}
        logger.info(f"按照 【%s】 进行分包!" %
                    self.custom_classfy if self.custom_classfy else "SPK")

        for spk, wav_name, content in self.mklines():  # 遍历说话人id,音频名,音频内容 信息

            wav_time = self.wav_time_map.get(wav_name)
            if wav_time is None:
                logger.warning("未获取到音频时间 [%s]" % wav_name)
                continue

            wav_info = [  # 填充新平台文本格式
                {
                    "Wav_name":
                    wav_name,
                    "Length_time":
                    wav_time,
                    "Data": [{
                        "text": content,
                        "start_time": 0,
                        "end_time": wav_time
                    }],
                    "Wav_suf":
                    wav_suf,
                }
            ]

            whole_wavs.add(wav_name)

            if self.custom_classfy:  # 指定分包数的模式

                ids = counter
                if ids not in wavs_info_map:
                    wavs_info_map[ids] = [wav_info]

                else:
                    if len(wavs_info_map[ids]) == self.custom_classfy - 1:
                        counter += 1

                    wavs_info_map[ids].append(wav_info)

            else:  # 默认分包模式

                if spk not in wavs_info_map:
                    wavs_info_map[spk] = [wav_info]
                else:
                    wavs_info_map[spk].append(wav_info)

        whole_sum = len(whole_wavs)
        result = (wavs_info_map, whole_sum)
        return result
Example #3
0
def update_graph():

    # 中文分词
    cmd = f"python3 {word_seg_py}"
    logger.info(cmd)
    os.system(cmd)

    # 更新模型

    logger.info(update_graph_sh)
    os.system(update_graph_sh)
Example #4
0
def main(wavspath, outputpath, value, tocut):
    result_error = {}
    for root, dirs, files in os.walk(wavspath):

        for file in files:
            if not file.endswith('wav'):
                continue

            package_name = file.rstrip('.wav')
            logger.info(f'处理{package_name}中.')

            wav_path = os.path.join(root, package_name + '.wav')
            txt_path = os.path.join(root, package_name + '.txt')

            wavmaps = read_txttime(txt_path)

            wavslist = sorttime(wavmaps, txt_path)

            tail_time = read_tail_time(wavslist)
            wave_time = read_wavtime(wav_path)

            time_ = tail_time - wave_time
            diff_time = round(time_, 3)
            logger.info(f'时间点差为{diff_time}.')

            if not judgement_time(time_, threshold=value):
                result_error[package_name] = {'tail_time': tail_time, 'wave_time': wave_time}
                continue

            if tocut:
                outputpath = mkdir(os.path.join(outputpath, package_name))
                pool = Pool(processes=8)

                logger.info(f'切割 {file} 中...')
                for item in wavslist:
                    wavname = item.get('wavname')
                    outputwav = os.path.join(outputpath, wavname)
                    start = item.get('start')
                    end = item.get('end')

                    pool.apply_async(wavcut, (wav_path, outputwav, start, end))

                pool.close()
                pool.join()

    logger.info('检测完成!')

    if result_error:
        logger.error('%s 未通过检测,需手动调整切割 [%s]' % (list(result_error.keys()), result_error))

    else:
        logger.info('全部音频通过检测!')
Example #5
0
def sorttime(maps, txt_path):
    wavinfo = list()
    with open(txt_path, 'w', encoding='utf-8') as fw:
        fw.write(maps.pop('header'))
        for wavname, values in sorted(maps.items(), key=lambda x: float(x[1].get('start'))):
            start = values.get('start')
            end = values.get('end')
            text = values.get('text')
            fw.write(f"{wavname}\t[{start},{end}]\t{text}\n")
            wavinfo.append({'wavname': wavname, 'start': float(start), 'end': float(end)})

    logger.info('时间排序完成!')
    return wavinfo
Example #6
0
def proc(file_path, rate, channel, bit, target_dir, func):
    file_name = os.path.basename(file_path)
    target_path = os.path.join(target_dir, file_name)

    if func == "addhead":
        cmd = "sox -t raw -c %s -e signed-integer -b %s -r %s %s %s" % (
            channel,
            bit,
            rate,
            file_path,
            target_path,
        )
    else:
        cmd = f"sox {file_path} -r {rate} -b {bit} -c {channel} {target_path}"

    logger.info(cmd)
    os.system(cmd)
Example #7
0
def run_decode():
    # 数据vad

    segment_dir = f"output/{project_name}_seg"
    if not os.path.exists(os.path.join(segment_dir, "segments")):
        vad()
    else:
        logger.info("sgements file has exist, vad pass!")

    # 数据准备
    cmd = f"cp -r {segment_dir} data/"
    logger.info(cmd)
    os.system(cmd)

    # 解码操作

    logger.info(decode_sh)
    os.system(decode_sh)
    logger.info("####------Done!--------####")
Example #8
0
def run_decode():
    # 数据vad
    if not os.path.exists(segment):
        vad()
    else:
        logger.info("sgements file has exist, vad pass!")

    # 数据准备
    cmd = f"cp -r {segment_dir} data/"
    logger.info(cmd)
    os.system(cmd)

    # 解码操作
    logger.info(decode_sh)
    os.system(decode_sh)

    if os.path.exists(decode_file):
        logger.info("####------解码完成!--------####")
    else:
        logger.error("####------解码失败!--------####")
Example #9
0
def vad():
    logger.info(vad_sh)
    os.system(vad_sh)
Example #10
0
    else:
        logger.info("sgements file has exist, vad pass!")

    # 数据准备
    cmd = f"cp -r {segment_dir} data/"
    logger.info(cmd)
    os.system(cmd)

    # 解码操作

    logger.info(decode_sh)
    os.system(decode_sh)
    logger.info("####------Done!--------####")


if __name__ == "__main__":

    if func == "update":
        update_graph()
        os._exit(0)
    elif func == "decode":
        run_decode()
        os._exit(0)
    elif func == "vad":
        vad()
        os._exit(0)
    else:
        update_graph()
        run_decode()
        logger.info("DONE!")
Example #11
0

if __name__ == "__main__":

    file_exists(project_wavs_path)
    file_exists(project_txts_path)

    if not check_valid(project_txts_path):
        sys.exit()

    if not os.path.exists(time_map):
        dic_map = wavtime_map(project_wavs_path)
        with open(time_map, "w", encoding="utf-8") as f:
            json.dump(dic_map, f)

        logger.info("保存timemap成功! 共%s条" % len(dic_map.keys()))

    if not os.path.exists(time_map):
        logger.info("time_map 未生成!!!")
        sys.exit()

    with open(time_map, "r", encoding="utf-8") as f:
        dic_map = json.loads(f.read())

    logger.info("载入音频map完成! \n--> %s" %
                {key: dic_map[key]
                 for key in list(dic_map.keys())[:6]})

    result_map, whsum = CreatPTFile(project_txts_path, dic_map, classify).run()

    save_to_file(save_upfile, result_map)