def train_model_dir(quantity_file=5):
    """
    function train_model_dir
    """
    print("Bắt đầu training")
    file_paths = open(c.FILE_NAME_TRAIN, 'r')
    features = np.asarray(())
    count = 1
    for path in file_paths:
        path = path.strip()
        print(path)

        sr, audio = read(c.TRAIN_SET + path)
        vector = ef(audio, sr)

        if features.size == 0:
            features = vector
        else:
            features = np.vstack((features, vector))

        if count == quantity_file:
            gmm = GaussianMixture(n_components=6,
                                  max_iter=200,
                                  covariance_type='diag',
                                  n_init=10)
            gmm.fit(features)

            picklefile = path.split('-')[0] + '.gmm'
            pickle.dump(gmm, open(c.TRAINED_MODELS + picklefile, 'wb'))
            features = np.asarray(())
            count = 0
        count += 1
def train_model(quantity_file=5, record=True):
    if record == True:
        record_audio(quantity_file, c.RECORD_SECONDS, c.FILE_NAME_TRAIN,
                     c.TRAIN_SET)

    print("Bắt đầu training")
    file_paths = open(c.FILE_NAME_TRAIN, 'r')
    features = np.asarray(())
    count = 1
    for path in file_paths:
        path = path.strip()
        print(path)

        sr, audio = read(c.TRAIN_SET + path)
        vector = ef(audio, sr)

        if features.size == 0:
            features = vector
        else:
            features = np.vstack((features, vector))

        if count == c.QUANTITY_TRAIN_FILE:
            gmm = GaussianMixture(n_components=6,
                                  max_iter=200,
                                  covariance_type='diag',
                                  n_init=10)
            gmm.fit(features)

            picklefile = path.split('-')[0] + '.gmm'
            pickle.dump(gmm, open(c.TRAINED_MODELS + picklefile, 'wb'))
            features = np.asarray(())
            count = 0
        count += 1
Exemple #3
0
def test_list_file_from_dir():
    """
    lấy tất cả các file từ thư mục config.TEST_SET == 'testing_set'
    :return:
    """
    # engine = pyttsx3.init()
    # voices = engine.getProperty('voices')
    # engine.setProperty("voice", voices[1].id)
    print("Bắt đầu kiểm tra")

    gmm_files = [
        os.path.join(c.TRAINED_MODELS, file_name)
        for file_name in os.listdir(c.TRAINED_MODELS)
        if file_name.endswith('.gmm')
    ]

    models = [pickle.load(open(file_name, 'rb')) for file_name in gmm_files]
    speakers = [
        file_name.split('\\')[-1].split(".gmm")[0] for file_name in gmm_files
    ]
    cols = ['file_name', 'test_speaker', 'scores', 'speaker', 'result']
    df_result = pd.DataFrame(columns=cols)

    file_paths = os.listdir(c.TEST_SET)

    for file_path in file_paths:
        paths = os.listdir(c.TEST_SET + file_path)
        print(file_path)
        for path in paths:
            sr, audio = read(c.TEST_SET + file_path + '/' + path)
            vector = ef(audio, sr)
            log_likelihood = np.zeros(len(models))
            for i in range(len(models)):
                gmm = models[i]
                scores = np.array(gmm.score(vector))
                log_likelihood[i] = scores.sum()

            winner = np.argmax(log_likelihood)  # trained_models/g
            speaker = speakers[winner].split('/')[-1]
            test_speaker = path.split('-')[0]
            if speaker == test_speaker:
                result = 1
            else:
                result = 0

            # content = f"{path} la {speaker} nói"
            # engine.say(content)
            new_data = [path, test_speaker, scores, speaker, result]
            new_df = pd.DataFrame([new_data], columns=cols)
            df_result = pd.concat([df_result, new_df], ignore_index=True)

    # engine.runAndWait()
    df_result.to_csv('res/result.csv')
    print("Sumit successfully!result to save dir res/result.csv")
Exemple #4
0
def test_one_file_dir(file_name):
    """
    chạy 1 file bất kỳ từ đường dẫn truyền vào
    :param file_name:đường dẫn file
    :return: void
    """
    # file_path = record_one_file(file_name)
    engine = pyttsx3.init()
    voices = engine.getProperty('voices')
    engine.setProperty("voice", voices[1].id)
    print("bắt đầu kiểm tra")
    gmm_files = [
        os.path.join(c.TRAINED_MODELS, file_name)
        for file_name in os.listdir(c.TRAINED_MODELS)
        if file_name.endswith('.gmm')
    ]

    models = [pickle.load(open(file_name, 'rb')) for file_name in gmm_files]
    speakers = [
        file_name.split('\\')[-1].split(".gmm")[0] for file_name in gmm_files
    ]

    file_path = file_name.strip()
    sr, audio = read(file_path)
    vector = ef(audio, sr)

    log_likelihood = np.zeros(len(models))

    for i in range(len(models)):
        gmm = models[i]
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()

    winner = np.argmax(log_likelihood)

    speaker = speakers[winner].split('/')[-1]
    content = f"file được truyền vào là {speaker} nói"
    engine.say(content)
    engine.runAndWait()
    print(f"{file_name} la {speaker} nói")
Exemple #5
0
def test_list_file(quantity_file=2, record=False):
    if record == True:
        record_audio(quantity_file, c.RECORD_SECONDS, c.FILE_NAME_TEST,
                     c.TEST_SET, True)

    print("Bắt đầu kiểm tra")
    gmm_files = [
        os.path.join(c.TRAINED_MODELS, file_name)
        for file_name in os.listdir(c.TRAINED_MODELS)
        if file_name.endswith('.gmm')
    ]

    models = [pickle.load(open(file_name, 'rb')) for file_name in gmm_files]
    speakers = [
        file_name.split('\\')[-1].split(".gmm")[0] for file_name in gmm_files
    ]
    file_paths = open(c.FILE_NAME_TEST, 'r')
    cols = ['file_name', 'test_speaker', 'scores', 'speaker']
    df_result = pd.DataFrame(columns=cols)
    # test_list_file(file_paths, models, speakers)
    for path in file_paths:
        path = path.strip()
        sr, audio = read(c.TEST_SET + path)
        vector = ef(audio, sr)

        log_likelihood = np.zeros(len(models))
        for i in range(len(models)):
            gmm = models[i]
            scores = np.array(gmm.score(vector))
            log_likelihood[i] = scores.sum()

        winner = np.argmax(log_likelihood)
        speaker = speakers[winner].split('/')[-1]
        # str = f"{path} la {speaker} nói"
        test_speaker = path.split('-')[0]
        new_data = [path, test_speaker, scores, speaker]
        new_df = pd.DataFrame([new_data], columns=cols)
        df_result = pd.concat([df_result, new_df], ignore_index=True)
    df_result.to_csv('res/result.csv')
    print("Sumit successfully!result to save dir res/result.csv")
def train_model_more_dir():
    """
    function train model
    """
    print("Bắt đầu training")
    features = np.asarray(())
    count = 1

    paths = os.listdir(c.TRAIN_SET)

    step = 0
    for path in paths:
        print(path)
        list_file = os.listdir(c.TRAIN_SET + path)

        for path_file in list_file:
            sr, audio = read(c.TRAIN_SET + path + '/' + path_file)
            vector = ef(audio, sr)

            if features.size == 0:
                features = vector
            else:
                features = np.vstack((features, vector))

            if count == len(list_file):
                gmm = GaussianMixture(n_components=6,
                                      max_iter=200,
                                      covariance_type='diag',
                                      n_init=10)
                gmm.fit(features)

                picklefile = path_file.split('-')[0] + '.gmm'
                pickle.dump(gmm, open(c.TRAINED_MODELS + picklefile, 'wb'))
                features = np.asarray(())
                count = 0
            count += 1