def main(): """ :param arg: 输入文件路径: 文件树结构(例如): ——arg ——人工 (此目录下的wav文件均会被读取,并将标记为人工井内施工) 20180310253.wav ——2018.1.3 20180103111.wav ——机械施工 ——放缆 :return: """ with open('.\\test_config.json', encoding='UTF-8') as file: test_config = json.load(file, strict=False) path = test_config['test_data_path'] seg_param = test_config['seg_param'] seg = seg_param['seg'] # 4s分段 nw = seg_param['nw'] # 帧长约23ms*4 n_mfcc = seg_param['n_mfcc'] # mfcc 维数 save_file = test_config['hmm_model'] data_dict = load_audio(path, seg, nw, n_mfcc) # save_file = '..\\model' hmms_model = hmms() model_list = common.find_ext_files(save_file, ext='.npy') for model_file in model_list: hmms_model.load_one_model(model_file) # 加载模型 # hmms_model.load_model(save_file) model_num = len(hmms_model.hmms) for key in data_dict: print('当前预测音频文件夹:%s' % key) instance = data_dict[key] audio_num = instance.get_num() species_count = np.zeros(model_num) for j in range(audio_num): # print('\t音频名:%s' % instance.audio_name[j]) frame_data = instance.frame[j] length = instance.frame_num[j] predicts = hmms_model.batch_predict(frame_data, length=length) count = np.bincount(predicts) # for i in range(len(count)): # print('\t\t%s:%d' % (hmms_model.model_name[i], count[i]), end='\t') major = np.argmax(count) if hmms_model.model_name[major] == '背景音': count[major] = 0 major = np.argmax(count) species_count[major] += 1 # print('\n\t\t预测结果:%s\n' % hmms_model.model_name[major]) print('当前种类识别分布:') species_count /= audio_num for i in range(model_num): print('\t%s: %f' % (hmms_model.model_name[i], species_count[i]))
def load_audio(path, seg, nw, n_mfcc): # seg = 4 # 2s分段 # nw = 1024 # 帧长约23ms*4 # n_mfcc = 32 # mfcc 维数 folder_list = common.list_files(path) data_dict = {} for folder in folder_list: species = folder.split('\\')[-1] wav_list = common.find_ext_files(folder, '.wav') instance = dataObejct() for wav in wav_list: wavname = wav.split('\\')[-1] wav_data, fs = common.read_wav_file(wav) # 多通道仅取一通道 if wav_data.ndim > 1: wav_data = wav_data[:, 0] wav_data = wav_data.T ref_value = 2**12 - 1 wav_data = wav_data / ref_value # wave幅值归一化 filter_data = common.butter_worth_filter(wav_data, cutoff=1000, fs=fs, btype='high', N=8) seg_mfcc, frame_num = enframe_and_feature_extract( filter_data, seg, nw, fs, n_mfcc) # # 输出为[n_mfcc, n_sample] # mfcc_data = librosa.feature.mfcc(y=filter_data, sr=fs, n_mfcc=n_mfcc, n_fft=nw, hop_length=inc) instance.append(audio_name=wavname, origin=filter_data, frame=seg_mfcc, frame_num=frame_num) data_dict[species] = instance return data_dict
def load_and_train(path, seg, nw, n_mfcc, save_path): """ :param path: 音频存储路径 :param nw: 帧长 :param n_mfcc: mfcc特征维数 :return: hmm_models, hmm模型对象 test_list,list[instance('frame':分帧数据 ,'origin':原始音频数据(滤波), 'frame_num':帧数), ] """ hmm_models = hmms(n_iter=1000) list_folders = common.list_files(path) test_list = [] for i in range(len(list_folders)): name = list_folders[i].split('\\')[-1] config_name = os.path.join(path, name + '.json') with open(config_name, encoding='UTF-8') as file: config = json.load(file) n_components = config['n_components'] n_mixs = config['n_mixs'] audio_num_for_train = config['audio num for train'] # audio_num_for_train=10 list_wavs = common.find_ext_files(list_folders[i], ext='.wav') print('%d = %s num:%d' % (i, list_folders[i], len(list_wavs))) list_wavs = shuffle_list(list_wavs) instance = dataObejct() instance.set_name(name) debug = 0 for wavname in list_wavs: if debug >= audio_num_for_train: break debug += 1 wav_data, fs = common.read_wav_file(wavname) # 多通道仅取一通道 if wav_data.ndim > 1: wav_data = wav_data[:, 0] wav_data = wav_data.T ref_value = 2**12 - 1 wav_data = wav_data / ref_value # wave幅值归一化 filter_data = common.butter_worth_filter(wav_data, cutoff=1000, fs=fs, btype='high', N=8) seg_mfcc, frame_num = enframe_and_feature_extract( filter_data, seg, nw, fs, n_mfcc) # # 输出为[n_mfcc, n_sample] # mfcc_data = librosa.feature.mfcc(y=filter_data, sr=fs, n_mfcc=n_mfcc, n_fft=nw, hop_length=inc) instance.append(audio_name=wavname, origin=filter_data, frame=seg_mfcc, frame_num=frame_num) split_ = int(instance.get_num() / 2) shuffled_instance = instance.shuffle() train_samples = shuffled_instance[split_:] # 确定训练集最大帧数,留作样本平衡使用。 train_samples.recompute_total() train_samples.set_name(name) frames = np.empty((0, n_mfcc)) frames_num = [] npy_name = name + '_' + str(n_mixs) + '_' + str(n_components) + '.npy' save_name = os.path.join(save_path, npy_name) if not os.path.exists(save_name): for j in range(len(train_samples.origin)): frame_data = train_samples.frame[j] frame_data = frame_data.reshape((-1, n_mfcc)) frames = np.vstack((frames, frame_data)) frames_num += train_samples.frame_num[j] if sum(frames_num) != frames.shape[0]: print('sum(frames_num) = ', sum(frames_num)) print('total frames = ', frames.shape[0]) raise ValueError('sum(frames_num) != frames.shape[0]') hmm_models.train_one(frames, frames_num, n_components, n_mixs, name) else: print('\t模型%s已存在,加载模型' % npy_name) hmm_models.load_one_model(save_name) test_samples = shuffled_instance[:split_] test_samples.set_name(name) test_list.append(test_samples) hmm_models.save_model(save_path) return hmm_models, test_list
def load_optical_data(path, seg, nw, n_mfcc): """ :param path: 音频存储路径 :param nw: 帧长 :param n_mfcc: mfcc特征维数 :return: train_list,list[instance('frame':分帧数据 ,'origin':原始音频数据(滤波), 'frame_num':帧数), ] test_list,list[instance('frame':分帧数据 ,'origin':原始音频数据(滤波), 'frame_num':帧数), ] """ list_folders = common.list_files(path) train_list = [] test_list = [] for i in range(len(list_folders)): name = list_folders[i].split('\\')[-1] list_wavs = common.find_ext_files(list_folders[i], ext='.wav') print('%d = %s num:%d' % (i, list_folders[i], len(list_wavs))) list_wavs = shuffle_list(list_wavs) instance = dataObejct() instance.set_name(name) debug = 0 for wavname in list_wavs: if debug >= 200: break debug += 1 wav_data, fs = common.read_wav_file(wavname) # 多通道仅取一通道 if wav_data.ndim > 1: wav_data = wav_data[:, 0] wav_data = wav_data.T ref_value = 2**12 - 1 wav_data = wav_data / ref_value # wave幅值归一化 filter_data = common.butter_worth_filter(wav_data, cutoff=1000, fs=fs, btype='high', N=8) seg_mfcc, frame_num = enframe_and_feature_extract( filter_data, seg, nw, fs, n_mfcc) # # 输出为[n_mfcc, n_sample] # mfcc_data = librosa.feature.mfcc(y=filter_data, sr=fs, n_mfcc=n_mfcc, n_fft=nw, hop_length=inc) instance.append(audio_name=wavname, origin=filter_data, frame=seg_mfcc, frame_num=frame_num) split_ = int(instance.get_num() / 2) shuffled_instance = instance.shuffle() train_samples = shuffled_instance[split_:] # 确定训练集最大帧数,留作样本平衡使用。 train_samples.recompute_total() train_samples.set_name(name) test_samples = shuffled_instance[:split_] test_samples.set_name(name) train_list.append(train_samples) test_list.append(test_samples) return train_list, test_list
def Run(): with open('sys_config.json', encoding='UTF-8') as file: data = json.load(file) src_path = data['src_path'] dst_path = data['dst_path'] hmm_param = data['hmm_param'] hmm_model_path = data['hmm_model'] seg = hmm_param['seg'] # hmm分段 nw = hmm_param['nw'] # 帧长 n_mfcc = hmm_param['n_mfcc'] # mfcc维数 # hmm 加载初始化 print("loading hmm models") hmms = hmm_optical_sensing.hmms() model_list = common.find_ext_files(hmm_model_path, ext='.npy') for model_file in model_list: hmms.load_one_model(model_file) # 加载模型 print("hmm models is loaded.") # 根据类别名创建文件夹 print("building folder for alarm audio") for name in hmms.model_name: dir = os.path.join(dst_path, name) common.mkdir(dir) print("folder build.") # SQL server 初始化 sql_param = data['sql_param'] isconnect = int(sql_param['isconnect']) if isconnect == 1: host = sql_param['host'] user = sql_param['user'] pwd = sql_param['pwd'] db = sql_param['db'] table = sql_param['table'] ms = MsSQL(host=host, user=user, pwd=pwd, db=db) print('isconnect==1! the alarm will be insert to the SQL server!') elif isconnect == 0: ms = None table = 'empty' print('isconnect==0! the alarm will not be insert to the SQL server!') else: raise ValueError('isconnect value error! it should be 0 or 1!') # ms.InsertAlarm(' ', ' ', ' ', ' ') print("Detecting", end='') count = 0 while True: count += 1 if count % 5 == 0: # print('\b\b\b\b', end='') # print(' ', end='') # print('\b\b\b\b', end='') print("\rDetecting", end='') else: print('.', end='') sys.stdout.flush() wav_list = common.find_ext_files(src_path, ext='.wav') time.sleep(1) for wav in wav_list: filename = wav.split('\\')[-1] try: predict_result = hmms.predict_wav(wav, seg=seg, nw=nw, n_mfcc=n_mfcc) except Exception as e: print('\rfile %s detection error %s' % (filename, e)) continue result = hmms.model_name[predict_result] audio_save_path = os.path.join(dst_path, result, filename) shutil.move(wav, audio_save_path) # print("Event Occur: %s---%s" % (filename, result)) print(("\rEvent Occur: %s---%s" % (filename, result)), end=' ') sys.stdout.flush() # 添加到数据库 if ms is not None: date = filename.split('.')[0] # ms.InsertAlarm(table, date=date, alarmtype=result, audio_save_path=audio_save_path) ms.UpdateAlarm(table, date=date, alarmtype=result, audio_save_path=audio_save_path) print('写入数据库') else: print('') # 确保当前list的wav文件已被处理,不被重复处理。 for wav in wav_list: if os.path.exists(wav): os.remove(wav)