def predict(config, audio_path: str, model) -> None: """ 预测音频情感 Args: config: 配置项 audio_path (str): 要预测的音频路径 model: 加载的模型 """ # utils.play_audio(audio_path) if (config.feature_method == 'o'): # 一个玄学 bug 的暂时性解决方案 of.get_data(config, audio_path, config.predict_feature_path_opensmile, train=False) test_feature = of.load_feature(config, config.predict_feature_path_opensmile, train=False) elif (config.feature_method == 'l'): test_feature = lf.get_data(config, audio_path, config.predict_feature_path_librosa, train=False) result = model.predict(test_feature) result_prob = model.predict_proba(test_feature) print('Recogntion: ', config.class_labels[int(result)]) print('Probability: ', result_prob) utils.radar(result_prob, config.class_labels)
def predict(config) -> None: """ Predict the emotion of the input audio Args: confguration items audio_path (str): path of input audio """ # utils.play_audio(audio_path) if config.feature_method == "o": of.get_data( config, config.audio_path, config.predict_feature_path_opensmile, train=False, ) test_feature = of.load_feature(config, config.predict_feature_path_opensmile, train=False) elif config.feature_method == "l": test_feature = lf.get_data(config, config.audio_path, config.predict_feature_path_librosa, train=False) test_feature = test_feature.reshape(1, test_feature.shape[0], test_feature.shape[1]) test_feature = flow.tensor(test_feature, dtype=flow.float32, device="cuda") n_feats = test_feature.shape[2] if config.model == "lstm": model = lstm_ser(n_feats, config.rnn_size, len(config.class_labels), 1) else: model = cnn1d_ser(1, config.n_kernels, n_feats, config.hidden_size, len(config.class_labels)) SER_model = model SER_model.to("cuda") model_path = os.path.join(config.checkpoint_path, config.checkpoint_name) SER_model.load_state_dict(flow.load(model_path)) flow.no_grad() logits = SER_model(test_feature) result = np.argmax(logits.numpy(), ) print("Recognition:", config.class_labels[int(result)]) result_prob = flow.softmax(logits, dim=1) utils.radar(result_prob.numpy().squeeze(), config.class_labels)
def train(model_name: str, save_model_name: str, feature_method: str = 'o'): # 加载被 preprocess.py 预处理好的特征 if (feature_method == 'o'): x_train, x_test, y_train, y_test = of.load_feature( feature_path=config.TRAIN_FEATURE_PATH_OPENSMILE, train=True) elif (feature_method == 'l'): x_train, x_test, y_train, y_test = lf.load_feature( feature_path=config.TRAIN_FEATURE_PATH_LIBROSA, train=True) # 创建模型 if (model_name == 'svm'): model = SVM_Model() elif (model_name == 'mlp'): model = MLP_Model() elif (model_name == 'lstm'): y_train = np_utils.to_categorical(y_train) y_val = np_utils.to_categorical(y_test) model = LSTM_Model(input_shape=x_train.shape[1], num_classes=len(config.CLASS_LABELS)) # 二维数组转三维(samples, time_steps, input_dim) x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1])) x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1])) # 训练模型 print( '---------------------------- Start Training ----------------------------' ) if (model_name == 'svm' or model_name == 'mlp'): model.train(x_train, y_train) elif (model_name == 'lstm'): model.train(x_train, y_train, x_test, y_val, n_epochs=config.epochs) print( '------------------------------ End Training ------------------------------' ) # 验证模型 print(x_test.shape, y_test.shape) print(y_test) model.evaluate(x_test, y_test) # 保存训练好的模型 print('saving model at ' + save_model_name) model.save_model(save_model_name) print("Donezo")
def train(config) -> None: """ 训练模型 Args: config: 配置项 Returns: model: 训练好的模型 """ # 加载被 preprocess.py 预处理好的特征 if (config.feature_method == 'o'): x_train, x_test, y_train, y_test = of.load_feature( config, config.train_feature_path_opensmile, train=True) elif (config.feature_method == 'l'): x_train, x_test, y_train, y_test = lf.load_feature( config, config.train_feature_path_librosa, train=True) # x_train, x_test (n_samples, n_feats) # y_train, y_test (n_samples) # 搭建模型 model = models.make(config=config, n_feats=x_train.shape[1]) # 训练模型 print('----- start training', config.model, '-----') if config.model in ['lstm', 'cnn1d', 'cnn2d']: y_train, y_val = np_utils.to_categorical( y_train), np_utils.to_categorical(y_test) # 独热编码 model.train(x_train, y_train, x_test, y_val, batch_size=config.batch_size, n_epochs=config.epochs) else: model.train(x_train, y_train) print('----- end training ', config.model, ' -----') # 验证模型 model.evaluate(x_test, y_test) # 保存训练好的模型 model.save(config.checkpoint_path, config.checkpoint_name)
def predict(config, audio_path, model): # play_audio(audio_path) if(config.feature_method == 'o'): # 一个玄学 bug 的暂时性解决方案 of.get_data(config, audio_path, config.predict_feature_path_opensmile, train = False) test_feature = of.load_feature(config, config.predict_feature_path_opensmile, train = False) elif(config.feature_method == 'l'): test_feature = lf.get_data(config, audio_path, config.predict_feature_path_librosa, train = False) test_feature = reshape_input(config.model, test_feature) result = model.predict(test_feature) if config.model in ['lstm', 'cnn1d', 'cnn2d']: result = np.argmax(result) result_prob = model.predict_proba(test_feature)[0] print('Recogntion: ', config.class_labels[int(result)]) print('Probability: ', result_prob) Radar(result_prob, config.class_labels)
def train(config): # 加载被 preprocess.py 预处理好的特征 if (config.feature_method == 'o'): x_train, x_test, y_train, y_test = of.load_feature( config, config.train_feature_path_opensmile, train=True) elif (config.feature_method == 'l'): x_train, x_test, y_train, y_test = lf.load_feature( config, config.train_feature_path_librosa, train=True) # x_train, x_test (n_samples, n_feats) # y_train, y_test (n_samples) # 训练模型 print('----- start training', config.model, '-----') if config.model in ['lstm', 'cnn1d', 'cnn2d']: y_train, y_val = np_utils.to_categorical( y_train), np_utils.to_categorical(y_test) # 独热编码 train_model(config, x_train, y_train, x_test, y_val, config.batch_size, config.epochs) else: train_model(x_train, y_train) print('----- end training ', config.model, ' -----')
def predict(model, model_name: str, file_path: str, feature_method: str = 'o'): file_path = os.path.dirname(os.path.abspath(__file__)) + '/' + file_path play_audio(file_path) if(feature_method == 'o'): # 一个玄学 bug 的暂时性解决方案 of.get_data(file_path, config.PREDICT_FEATURE_PATH_OPENSMILE, train = False) test_feature = of.load_feature(config.PREDICT_FEATURE_PATH_OPENSMILE, train = False) elif(feature_method == 'l'): test_feature = lf.get_data(file_path, config.PREDICT_FEATURE_PATH_LIBROSA, train = False) if(model_name == 'lstm'): # 二维数组转三维(samples, time_steps, input_dim) test_feature = np.reshape(test_feature, (test_feature.shape[0], 1, test_feature.shape[1])) result = model.predict(test_feature) if(model_name == 'lstm'): result = np.argmax(result) result_prob = model.predict_proba(test_feature)[0] print('Recogntion: ', config.CLASS_LABELS[int(result)]) print('Probability: ', result_prob) Radar(result_prob)
def train_eval(config): """ training and testing model Args: config: configuration items Returns: the trained model and the evaluation results """ # loading the features preprocessed by preprocess.py if config.feature_method == "o": x_train, x_test, y_train, y_test = of.load_feature( config, config.train_feature_path_opensmile, train=True) elif config.feature_method == "l": x_train, x_test, y_train, y_test = lf.load_feature( config, config.train_feature_path_librosa, train=True) n_feats = x_train.shape[1] y_train = np.array(y_train) y_test = np.array(y_test) train_dataset = SpeechDataset(x_train, y_train) test_dataset = SpeechDataset(x_test, y_test) train_iter = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) test_iter = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=True) if config.model == "lstm": model = lstm_ser(n_feats, config.rnn_size, len(config.class_labels), config.batch_size) else: model = cnn1d_ser(1, config.n_kernels, n_feats, config.hidden_size, len(config.class_labels)) loss_fn = nn.CrossEntropyLoss() model.to("cuda") loss_fn.to("cuda") optimizer = flow.optim.Adam(model.parameters(), lr=config.lr) def train(iter, model, loss_fn, optimizer): size = len(iter.dataset) num_batches = len(iter) trian_loss, correct = 0, 0 for batch, (x, y) in enumerate(iter): x = x.reshape(1, x.shape[0], x.shape[1]) x = flow.tensor(x, dtype=flow.float32, device="cuda") y = flow.tensor(y, dtype=flow.int32, device="cuda") # Compute prediction error pred = model(x) loss = loss_fn(pred, y) bool_value = np.argmax(pred.numpy(), 1) == y.numpy() correct += float(bool_value.sum()) trian_loss += loss # Backpropagation optimizer.zero_grad() loss.backward() optimizer.step() current = batch * config.batch_size if batch % 15 == 0: print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]") return trian_loss / num_batches, 100 * correct / size def test(iter, model, loss_fn): size = len(iter.dataset) num_batches = len(iter) model.eval() test_loss, correct = 0, 0 flag = 0 with flow.no_grad(): for x, y in iter: if x.shape[0] != config.batch_size: flag = 1 n = config.batch_size - x.shape[0] x_comp = flow.zeros((n, x.shape[1])) y_comp = flow.zeros(y.shape[0]) x = flow.tensor(np.vstack((x.numpy(), x_comp.numpy()))) y = flow.tensor(np.hstack((y.numpy(), y_comp.numpy()))) x = x.reshape(1, x.shape[0], x.shape[1]) x = flow.tensor(x, dtype=flow.float32, device="cuda") y = flow.tensor(y, dtype=flow.int32, device="cuda") pred = model(x) test_loss += loss_fn(pred, y) if flag == 0: bool_value = np.argmax(pred.numpy(), 1) == y.numpy() else: bool_value = np.argmax(pred.numpy()[0:16], 1) == y.numpy()[0:16] correct += float(bool_value.sum()) test_loss /= num_batches print("test_loss", test_loss, "num_batches ", num_batches) correct /= size print( f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f}" ) return test_loss, 100 * correct train_loss, train_acc, test_loss, test_acc = [], [], [], [] for e in range(config.epochs): print(f"Epoch {e + 1}\n-------------------------------") tr_loss, tr_acc = train(train_iter, model, loss_fn, optimizer) train_loss.append(tr_loss.numpy()) train_acc.append(tr_acc) te_loss, te_acc = test(test_iter, model, loss_fn) test_loss.append(te_loss.numpy()) test_acc.append(te_acc) print("Done!") # Saving the trained model model_path = os.path.join(config.checkpoint_path, config.checkpoint_name) if os.path.exists(model_path): shutil.rmtree(model_path) flow.save(model.state_dict(), model_path) # Visualize the training process if config.vis: curve(train_acc, test_acc, "Accuracy", "acc") curve(train_loss, test_loss, "Loss", "loss") return train_loss, test_loss, train_acc, test_acc