Esempio n. 1
0
def predict(config, audio_path: str, model) -> None:
    """
    预测音频情感

    Args:
        config: 配置项
        audio_path (str): 要预测的音频路径
        model: 加载的模型
    """

    # utils.play_audio(audio_path)

    if (config.feature_method == 'o'):
        # 一个玄学 bug 的暂时性解决方案
        of.get_data(config,
                    audio_path,
                    config.predict_feature_path_opensmile,
                    train=False)
        test_feature = of.load_feature(config,
                                       config.predict_feature_path_opensmile,
                                       train=False)
    elif (config.feature_method == 'l'):
        test_feature = lf.get_data(config,
                                   audio_path,
                                   config.predict_feature_path_librosa,
                                   train=False)

    result = model.predict(test_feature)
    result_prob = model.predict_proba(test_feature)
    print('Recogntion: ', config.class_labels[int(result)])
    print('Probability: ', result_prob)
    utils.radar(result_prob, config.class_labels)
Esempio n. 2
0
def predict(config) -> None:
    """
    Predict the emotion of the input audio

    Args:
        confguration items
        audio_path (str): path of input audio
    """

    # utils.play_audio(audio_path)
    if config.feature_method == "o":
        of.get_data(
            config,
            config.audio_path,
            config.predict_feature_path_opensmile,
            train=False,
        )
        test_feature = of.load_feature(config,
                                       config.predict_feature_path_opensmile,
                                       train=False)
    elif config.feature_method == "l":
        test_feature = lf.get_data(config,
                                   config.audio_path,
                                   config.predict_feature_path_librosa,
                                   train=False)

    test_feature = test_feature.reshape(1, test_feature.shape[0],
                                        test_feature.shape[1])
    test_feature = flow.tensor(test_feature, dtype=flow.float32, device="cuda")

    n_feats = test_feature.shape[2]

    if config.model == "lstm":
        model = lstm_ser(n_feats, config.rnn_size, len(config.class_labels), 1)
    else:
        model = cnn1d_ser(1, config.n_kernels, n_feats, config.hidden_size,
                          len(config.class_labels))
    SER_model = model
    SER_model.to("cuda")

    model_path = os.path.join(config.checkpoint_path, config.checkpoint_name)
    SER_model.load_state_dict(flow.load(model_path))
    flow.no_grad()

    logits = SER_model(test_feature)
    result = np.argmax(logits.numpy(), )
    print("Recognition:", config.class_labels[int(result)])

    result_prob = flow.softmax(logits, dim=1)
    utils.radar(result_prob.numpy().squeeze(), config.class_labels)
Esempio n. 3
0
def train(model_name: str, save_model_name: str, feature_method: str = 'o'):

    # 加载被 preprocess.py 预处理好的特征
    if (feature_method == 'o'):
        x_train, x_test, y_train, y_test = of.load_feature(
            feature_path=config.TRAIN_FEATURE_PATH_OPENSMILE, train=True)

    elif (feature_method == 'l'):
        x_train, x_test, y_train, y_test = lf.load_feature(
            feature_path=config.TRAIN_FEATURE_PATH_LIBROSA, train=True)

    # 创建模型
    if (model_name == 'svm'):
        model = SVM_Model()
    elif (model_name == 'mlp'):
        model = MLP_Model()
    elif (model_name == 'lstm'):
        y_train = np_utils.to_categorical(y_train)
        y_val = np_utils.to_categorical(y_test)

        model = LSTM_Model(input_shape=x_train.shape[1],
                           num_classes=len(config.CLASS_LABELS))

        # 二维数组转三维(samples, time_steps, input_dim)
        x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
        x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))

    # 训练模型
    print(
        '---------------------------- Start Training ----------------------------'
    )
    if (model_name == 'svm' or model_name == 'mlp'):
        model.train(x_train, y_train)
    elif (model_name == 'lstm'):
        model.train(x_train, y_train, x_test, y_val, n_epochs=config.epochs)
    print(
        '------------------------------ End Training ------------------------------'
    )

    # 验证模型
    print(x_test.shape, y_test.shape)
    print(y_test)
    model.evaluate(x_test, y_test)
    # 保存训练好的模型
    print('saving model at ' + save_model_name)
    model.save_model(save_model_name)
    print("Donezo")
def train(config) -> None:
    """
    训练模型

    Args:
        config: 配置项

    Returns:
        model: 训练好的模型
    """

    # 加载被 preprocess.py 预处理好的特征
    if (config.feature_method == 'o'):
        x_train, x_test, y_train, y_test = of.load_feature(
            config, config.train_feature_path_opensmile, train=True)

    elif (config.feature_method == 'l'):
        x_train, x_test, y_train, y_test = lf.load_feature(
            config, config.train_feature_path_librosa, train=True)

    # x_train, x_test (n_samples, n_feats)
    # y_train, y_test (n_samples)

    # 搭建模型
    model = models.make(config=config, n_feats=x_train.shape[1])

    # 训练模型
    print('----- start training', config.model, '-----')
    if config.model in ['lstm', 'cnn1d', 'cnn2d']:
        y_train, y_val = np_utils.to_categorical(
            y_train), np_utils.to_categorical(y_test)  # 独热编码
        model.train(x_train,
                    y_train,
                    x_test,
                    y_val,
                    batch_size=config.batch_size,
                    n_epochs=config.epochs)
    else:
        model.train(x_train, y_train)
    print('----- end training ', config.model, ' -----')

    # 验证模型
    model.evaluate(x_test, y_test)
    # 保存训练好的模型
    model.save(config.checkpoint_path, config.checkpoint_name)
Esempio n. 5
0
def predict(config, audio_path, model):
    
    # play_audio(audio_path)

    if(config.feature_method == 'o'):
        # 一个玄学 bug 的暂时性解决方案
        of.get_data(config, audio_path, config.predict_feature_path_opensmile, train = False)
        test_feature = of.load_feature(config, config.predict_feature_path_opensmile, train = False)
    elif(config.feature_method == 'l'):
        test_feature = lf.get_data(config, audio_path, config.predict_feature_path_librosa, train = False)
    
    test_feature = reshape_input(config.model, test_feature)
    
    result = model.predict(test_feature)
    if config.model in ['lstm', 'cnn1d', 'cnn2d']:
        result = np.argmax(result)

    result_prob = model.predict_proba(test_feature)[0]
    print('Recogntion: ', config.class_labels[int(result)])
    print('Probability: ', result_prob)
    Radar(result_prob, config.class_labels)
def train(config):
    # 加载被 preprocess.py 预处理好的特征
    if (config.feature_method == 'o'):
        x_train, x_test, y_train, y_test = of.load_feature(
            config, config.train_feature_path_opensmile, train=True)

    elif (config.feature_method == 'l'):
        x_train, x_test, y_train, y_test = lf.load_feature(
            config, config.train_feature_path_librosa, train=True)

    # x_train, x_test (n_samples, n_feats)
    # y_train, y_test (n_samples)

    # 训练模型
    print('----- start training', config.model, '-----')
    if config.model in ['lstm', 'cnn1d', 'cnn2d']:
        y_train, y_val = np_utils.to_categorical(
            y_train), np_utils.to_categorical(y_test)  # 独热编码
        train_model(config, x_train, y_train, x_test, y_val, config.batch_size,
                    config.epochs)
    else:
        train_model(x_train, y_train)
    print('----- end training ', config.model, ' -----')
Esempio n. 7
0
def predict(model, model_name: str, file_path: str, feature_method: str = 'o'):
    
    file_path = os.path.dirname(os.path.abspath(__file__)) + '/' + file_path
    play_audio(file_path)

    if(feature_method == 'o'):
        # 一个玄学 bug 的暂时性解决方案
        of.get_data(file_path, config.PREDICT_FEATURE_PATH_OPENSMILE, train = False)
        test_feature = of.load_feature(config.PREDICT_FEATURE_PATH_OPENSMILE, train = False)
    elif(feature_method == 'l'):
        test_feature = lf.get_data(file_path, config.PREDICT_FEATURE_PATH_LIBROSA, train = False)
    
    if(model_name == 'lstm'):
        # 二维数组转三维(samples, time_steps, input_dim)
        test_feature = np.reshape(test_feature, (test_feature.shape[0], 1, test_feature.shape[1]))
    
    result = model.predict(test_feature)
    if(model_name == 'lstm'):
        result = np.argmax(result)

    result_prob = model.predict_proba(test_feature)[0]
    print('Recogntion: ', config.CLASS_LABELS[int(result)])
    print('Probability: ', result_prob)
    Radar(result_prob)
Esempio n. 8
0
def train_eval(config):
    """
    training and testing model
    Args:
        config: configuration items
    Returns:
        the trained model and the evaluation results
    """
    # loading the features preprocessed by preprocess.py
    if config.feature_method == "o":
        x_train, x_test, y_train, y_test = of.load_feature(
            config, config.train_feature_path_opensmile, train=True)

    elif config.feature_method == "l":
        x_train, x_test, y_train, y_test = lf.load_feature(
            config, config.train_feature_path_librosa, train=True)

    n_feats = x_train.shape[1]
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    train_dataset = SpeechDataset(x_train, y_train)
    test_dataset = SpeechDataset(x_test, y_test)
    train_iter = DataLoader(train_dataset,
                            batch_size=config.batch_size,
                            shuffle=True)
    test_iter = DataLoader(test_dataset,
                           batch_size=config.batch_size,
                           shuffle=True)

    if config.model == "lstm":
        model = lstm_ser(n_feats, config.rnn_size, len(config.class_labels),
                         config.batch_size)
    else:
        model = cnn1d_ser(1, config.n_kernels, n_feats, config.hidden_size,
                          len(config.class_labels))

    loss_fn = nn.CrossEntropyLoss()
    model.to("cuda")
    loss_fn.to("cuda")
    optimizer = flow.optim.Adam(model.parameters(), lr=config.lr)

    def train(iter, model, loss_fn, optimizer):
        size = len(iter.dataset)
        num_batches = len(iter)
        trian_loss, correct = 0, 0
        for batch, (x, y) in enumerate(iter):
            x = x.reshape(1, x.shape[0], x.shape[1])
            x = flow.tensor(x, dtype=flow.float32, device="cuda")
            y = flow.tensor(y, dtype=flow.int32, device="cuda")
            # Compute prediction error
            pred = model(x)
            loss = loss_fn(pred, y)
            bool_value = np.argmax(pred.numpy(), 1) == y.numpy()
            correct += float(bool_value.sum())
            trian_loss += loss

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            current = batch * config.batch_size
            if batch % 15 == 0:
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

        return trian_loss / num_batches, 100 * correct / size

    def test(iter, model, loss_fn):
        size = len(iter.dataset)
        num_batches = len(iter)
        model.eval()
        test_loss, correct = 0, 0
        flag = 0
        with flow.no_grad():
            for x, y in iter:
                if x.shape[0] != config.batch_size:
                    flag = 1
                    n = config.batch_size - x.shape[0]
                    x_comp = flow.zeros((n, x.shape[1]))
                    y_comp = flow.zeros(y.shape[0])
                    x = flow.tensor(np.vstack((x.numpy(), x_comp.numpy())))
                    y = flow.tensor(np.hstack((y.numpy(), y_comp.numpy())))

                x = x.reshape(1, x.shape[0], x.shape[1])
                x = flow.tensor(x, dtype=flow.float32, device="cuda")
                y = flow.tensor(y, dtype=flow.int32, device="cuda")

                pred = model(x)

                test_loss += loss_fn(pred, y)
                if flag == 0:
                    bool_value = np.argmax(pred.numpy(), 1) == y.numpy()
                else:
                    bool_value = np.argmax(pred.numpy()[0:16],
                                           1) == y.numpy()[0:16]

                correct += float(bool_value.sum())
        test_loss /= num_batches
        print("test_loss", test_loss, "num_batches ", num_batches)
        correct /= size
        print(
            f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f}"
        )

        return test_loss, 100 * correct

    train_loss, train_acc, test_loss, test_acc = [], [], [], []
    for e in range(config.epochs):
        print(f"Epoch {e + 1}\n-------------------------------")
        tr_loss, tr_acc = train(train_iter, model, loss_fn, optimizer)
        train_loss.append(tr_loss.numpy())
        train_acc.append(tr_acc)
        te_loss, te_acc = test(test_iter, model, loss_fn)
        test_loss.append(te_loss.numpy())
        test_acc.append(te_acc)
    print("Done!")

    # Saving the trained model
    model_path = os.path.join(config.checkpoint_path, config.checkpoint_name)
    if os.path.exists(model_path):
        shutil.rmtree(model_path)
    flow.save(model.state_dict(), model_path)

    # Visualize the training process
    if config.vis:
        curve(train_acc, test_acc, "Accuracy", "acc")
        curve(train_loss, test_loss, "Loss", "loss")

    return train_loss, test_loss, train_acc, test_acc