コード例 #1
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        print(f, '->', label, ", score->", score)
    return label, score
コード例 #2
0
def task_verify(wav_url, person_id):
    start_time = time.time()
    print('开始时间:',
          time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)))
    m = ModelInterface.load(model)

    if person_id not in m.features:
        return 'fail', 'current user not trained', ''

    # 下载训练语音文件,
    current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time()))
    dest_wav = verify_voice_dir + current_time + '_' + person_id + '.wav'
    utils.download_file(wav_url, dest_wav)

    for f in glob.glob(os.path.expanduser(dest_wav)):
        fs, signal = utils.read_wav(f)
        probability = m.verify(fs, signal, person_id)
        print(probability)
        if probability > -48:
            print(f, '-> 匹配成功 :', person_id)
            return 'success', '', 'yes'
        else:
            print(f, '->未匹配成功')
            return 'success', '', 'no'

    end_time = time.time()
    print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S',
                                 time.localtime(end_time)))
    print('共耗时', end_time - start_time)
コード例 #3
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    fs, signal = read_wav(input_files)
    label, score = m.predict(fs, signal)
    print("label", '->', label, ", score->", score)
    result = [label, score]
    return result
コード例 #4
0
def task_predict(input_files, input_model):

    m = ModelInterface.load(input_model)
    if os.path.exists(input_files):
        for f in glob.glob(os.path.expanduser(input_files)):
            fs, signal = read_wav(f)
            label, score = m.predict(fs, signal)
            filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename(
                input_files)
            with DB(host='47.92.33.19',
                    user='******',
                    passwd='1qazxsw2',
                    db='database_fm') as db:

                # db.execute("INSERT INTO database_fm (id,radio_file_path,sound_markup) VALUES (null,'{}','{}')".format(f,label))
                db.execute(
                    "UPDATE fm_t_scan_record SET sound_markup = '{}' WHERE radio_file_path = '{}'"
                    .format(label, filepath))
            print(filepath, '->', label, ", score->", score)
            os.remove(f)
    else:
        filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename(
            input_files)
        with DB(host='47.92.33.19',
                user='******',
                passwd='1qazxsw2',
                db='database_fm') as db:
            db.execute(
                "UPDATE fm_t_scan_record SET sound_markup = 'Exception' WHERE radio_file_path = '{}'"
                .format(filepath))
コード例 #5
0
def OrderEnroll():
    m = ModelInterface.load("model.out")
    fs, signal = read_wav("./GUI/TotalRecording/18082020202755.wav")
    m.enroll("18082020202755", fs, signal)
    m.train()
    m.CheckEnroll()
    m.dump("mo1.out")


# def task_predictgui(path, input_model):
#     m = ModelInterface.load(input_model)
#     f=glob.glob(path)
#     fs, signal = read_wav(f[0])
#     label, score = m.predict(fs, signal)
#     return label
#
# if __name__ == "__main__":
#     # global args
#     # args = get_args()
#     #
#     # task = args.task
#     #
#     # if task == 'enroll':
#     #      task_enroll(args.input, args.model)
#     #
#     #
#     # elif task == 'predict':
#     #      task_predict(args.input, args.model)
#     #      task_predict("datatest/*.wav", "model1.out")
# task_enroll("./Train/*","model.out")
#
#      # task_predict("./Test", "model.out")
# Predict_ByFile("./GUI/TotalRecording/18082020202755.wav", "D:/doantotnghiep/Speaker_recognition/model.out")
# OrderEnroll()
コード例 #6
0
ファイル: gui.py プロジェクト: TAzOnee/SpeakerRecognition
 def load_model(self, MainWindow):
     global m
     fileName = QtWidgets.QFileDialog().getOpenFileName(
         MainWindow, "Load Model", "", "Model File (*.out)")
     print(fileName[0])
     self.ln_model.setText(fileName[0])
     m = ModelInterface.load(fileName[0])
コード例 #7
0
def task_check_status(person_id):

    m = ModelInterface.load(model)

    if person_id not in m.features:
        return 'success', '', 'no'
    else:
        return 'success', '', 'yes'
コード例 #8
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        g = open("Test_results.txt", "a")
        print(f, '->', label, ", score->", score, file=g)
        g.close()
コード例 #9
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)

        str = "label = {l2}  score = {l3}".format(l2=label, l3=score)
        return [str, label]
コード例 #10
0
ファイル: gui.py プロジェクト: danenigma/speaker_recoginition
 def load(self):
     fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "")
     if fname:
         try:
             self.backend = ModelInterface.load(fname)
         except Exception as e:
             self.warn(str(e))
         else:
             self.status("Loaded from file: " + fname)
コード例 #11
0
 def load(self):
     fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "")
     if fname:
         try:
             self.backend = ModelInterface.load(fname)
         except Exception as e:
             self.warn(str(e))
         else:
             self.status("Loaded from file: " + fname)
コード例 #12
0
	def __init__(self):
		
		self.backend = ModelInterface.load(self.INPUT_MODEL)
		try:
		    fs, signal = read_wav(self.BG)
		    self.backend.init_noise(fs, signal)
		except:
		    print "file not found!"
		self.pub  = rospy.Publisher('/speaker',String,queue_size = 10)
		self.sub  = rospy.Subscriber('/wav',numpy_msg(Floats),self.task_predict)
コード例 #13
0
    def __init__(self):

        self.backend = ModelInterface.load(self.INPUT_MODEL)
        try:
            fs, signal = read_wav(self.BG)
            self.backend.init_noise(fs, signal)
        except:
            print "file not found!"
        self.pub = rospy.Publisher('/speaker', String, queue_size=10)
        self.sub = rospy.Subscriber('/wav', numpy_msg(Floats),
                                    self.task_predict)
コード例 #14
0
def Predict_ByFile(file, input_model):
    print("start")
    m = ModelInterface.load(input_model)
    fs, signal = read_wav(file)
    print(fs)
    print(signal)
    label, score = m.predict(fs, signal)
    strPath = os.path.realpath(file)
    y_true = os.path.basename(os.path.dirname(strPath))
    print(label)
    print(score)
    return label
def feature_re_extract():
    #pdb.set_trace()
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    				
    # construct autoencoder
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=39,
        n_hidden=100
    )
    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    # train autoencoder
    training_epochs = 100
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da(numpy.asarray(train_set)))
    		print 'Training epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    for c in test_class:
    		m.features[c] = da.get_hidden_values(m.features[c]).eval()
    
    m.train()
    m.dump('model/model_da.out')
    with open('model/da.out', 'w') as f:
    		pickle.dump(da, f, -1)
コード例 #16
0
def evaluate(eval_data_dir, model_path):
    m = ModelInterface.load(model_path)
    files = [f for f in os.listdir(eval_data_dir) if re.search(r"\.wav", f)]
    total, n_correct = 0, 0
    for f in files:
        total += 1
        label, _ = f.split("_")
        file = os.path.join(eval_data_dir, f)
        fs, signal = read_wav(file)
        pred, _ = m.predict(fs, signal)
        logger.info("Input: {}, Output: {}".format(file, pred))
        if label == pred:
            n_correct += 1
    logger.info("Accuracy: {}".format(n_correct / total))
コード例 #17
0
def task_predict(input_files, input_model):
    # 把输入的多个模型目录字符串分离为目录列表
    input_models = [os.path.expanduser(k) for k in input_model.strip().split()]
    # 把各个目录下的模型列表解压出来组合成一个迭代器
    models = itertools.chain(*(glob.glob(m) for m in input_models))
    # 生成并加载包括所有模型文件的列表
    models = [ModelInterface.load(m) for m in models]
    # 定义统计准确率的变量
    right = 0
    wrong = 0
    num = 0
    # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        print(f)
        feat = get_feature(fs, signal)
        predict_result = []
        # 每个音频文件分别匹配每个模型组并得出分数放到总列表
        for model in models:
            #print(model)
            #m = ModelInterface.load(model)
            results = model.predict(feat)
            for result in results:
                predict_result.append(result)
        #print("predict_result:",predict_result)
        # 对预测结果按分数作高到底排序
        predict_result = sorted(predict_result,
                                key=operator.itemgetter(1),
                                reverse=True)
        #print("sort_predict_result:", predict_result)
        # 微信语音数据集的label格式
        label = os.path.basename(f).split('_')[0]  #[6:11]
        #label=os.path.basename(f).split('(')[0]#[6:11]
        # AISHELL数据集的label格式
        #label=os.path.basename(f)[6:11]
        predict = predict_result[0][0]
        #print('Top:',predict_result[:10])
        # 统计准确率
        if label in predict:
            right += 1
            print('label:', label, '  predict:', predict, '  right')
        else:
            wrong += 1
            print('label:', label, '  predict:', predict, '  wrong')
        num += 1
    print('All:', num, '  right:', right, '  wrong:', wrong, '  acc:',
          right / num)
コード例 #18
0
def task_predict(input_files, input_model):
    start_time = time.time()
    print('开始时间:',
          time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)))
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = utils.read_wav(f)
        label, probability = m.predict(fs, signal)
        #print(probability)
        if probability > -48:
            print(f, '->', label)
        else:
            print(f, '->未识别到说话人')
    end_time = time.time()
    print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S',
                                 time.localtime(end_time)))
    print('共耗时', end_time - start_time)
コード例 #19
0
def task_predict():
    m = ModelInterface.load('data.bin')
    predict_sound_path = os.path.join(os.getcwd(), 'predictSounds')
    dirs = os.listdir(predict_sound_path)
    wavs = []
    if len(dirs) == 0:
        print('No wav files found')
    else:
        for d in dirs:
            ext = os.path.splitext(d)[-1].lower()
            if ext == '.wav':
                wavs.append(d)
    for w in wavs:
        sample_rate, signal = read_wav(os.path.join(predict_sound_path, w))
        label = os.path.splitext(w)[0]
        label2, score = m.predict(sample_rate, signal)
        print(label, '->', label2, '->', score)
コード例 #20
0
def task_predict(input_files, input_model):
    total = 0
    acc = 0
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        total += 1
        fs, signal = read_wav(f)
        label = m.predict(fs, signal)
        print(f, '->', label, end=''),
        if f.split('/')[-2] == label:
            print("√")
            acc += 1
        else:
            print('×')

    acc = acc * 1.0 / total
    print(acc)
def test( up_bound, lower_bound ):
    m = ModelInterface.load('model/model_da.out')
    with open('model/da.out', 'r') as f:
    		da = pickle.load(f)
    count = 0;
    test_dir = 'data/test/'
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    file_name = ['1.wav', '2.wav']
    for c in test_class:
    		for n in file_name:
    				fs, signal = wavfile.read(test_dir + c + n)
    				if(predict(m, fs, signal[:80000], da, up_bound, lower_bound) == c):
    					count = count + 1
    print 'accuracy is:', (100.0*count)/(len(test_class)*len(file_name)), '%'
def feature_re_extract():
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    up_bound = []
    lower_bound = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    
    # put all values into -1~1
    up_bound = []
    lower_bound = []
    for j in xrange(len(train_set[0])):
    		up_bound.append(train_set[0][j])
    		lower_bound.append(train_set[0][j])
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				up_bound[j] = max(up_bound[j], train_set[i][j])
    				lower_bound[j] = min(lower_bound[j], train_set[i][j])
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1
    
    # construct stacked autoencoder
    sda = mSdA(
    		layers = [39, 100]
    )
    sda.setMinMax(up_bound, lower_bound)
    sda.train(train_set, 500) # use 500 as the batch size
    for c in test_class:
    		m.features[c] = sda.get_hidden_values(m.features[c])
    m.train()
    m.dump('model/model_sda.out')
    sda.dump('model/sda.out')
コード例 #23
0
def task_predict(path, input_model):
    m = ModelInterface.load(input_model)
    files = []
    sum, true = 0, 0
    # r=root, d=directories, f = files
    for r, d, f in os.walk(path):
        for file in f:
            if '.wav' in file:
                files.append(os.path.join(r, file))
    for f in files:
        sum += 1
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        strPath = os.path.realpath(f)
        y_true = os.path.basename(os.path.dirname(strPath))
        if (label == y_true):
            true += 1
        print(f, '->', label, ", score->", score)
    print('So file du doan dung: ', true)
    print('Tong so file: ', sum)
    print('accuracy: ', true / sum * 100, '%')
def test():
    m = ModelInterface.load('model/model_sda.out')
    sda = mSdA.load('model/sda.out')
    count = 0
    allsum = 0
    test_dir = 'data/test/'
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    file_name = ['1.wav', '2.wav']
    for c in test_class:
    		for n in file_name:
    				fs, signal = wavfile.read(test_dir + c + n)
    				signal_size = 40000
    				for indx in xrange(len(signal)/signal_size):
    						allsum = allsum + 1
    						if(predict(m, fs, signal[indx*signal_size:(indx+1)*signal_size], sda) == c):
    								count = count + 1
    print 'accuracy is:', (100.0*count)/(allsum), '%'
コード例 #25
0
def task_train_single(wav_url, person_id):

    if os.path.exists(model):
        m = ModelInterface.load(model)
    else:
        m = ModelInterface()

    if person_id in m.features:
        return 'fail', 'aleady exist'

    #下载训练语音文件
    dest_dir = train_voice_dir + person_id
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time()))
    dest_wav = dest_dir + '/' + current_time + '_' + person_id + '.wav'

    print(wav_url)
    print(dest_wav)
    utils.download_file(wav_url, dest_wav)

    #获取下载好的训练语音文件
    wavs = glob.glob(dest_dir + '/*.wav')

    if len(wavs) == 0:
        return 'fail', 'no wav files under this dir'

    #train the wavs
    for wav in wavs:
        try:
            fs, signal = utils.read_wav(wav)
            m.enroll(person_id, fs, signal)
            print("wav %s has been enrolled" % (wav))
        except Exception as e:
            print(wav + " error %s" % (e))

    m.train_single(person_id)
    m.dump(model)

    return 'success', ''
コード例 #26
0
def task_realtime_predict(input_model):
    print('start')
    m = ModelInterface.load(input_model)

    # set recording parameter
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    INTERVAL = 1
    INITLEN = 2

    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("* recording")

    frames = []

    # fulfill the frame
    for i in range(0, int(RATE / CHUNK * INITLEN)):
        data = np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist()
        frames.append(data)

    while True:
        for i in range(0, int(RATE / CHUNK * INTERVAL)):
            # 添加新的时间窗数据
            frames.append(
                np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist())
            # 去掉最老的时间窗数据
            frames.remove(frames[0])

        framesjoin = utils.flat_array(frames)
        framesjoin = np.array(framesjoin)
        label, probability = m.predict(16000, framesjoin)
        print('当前说话人->', label)
コード例 #27
0
                        required=True,
                        help="Path to model file")
    parser.add_argument("--task",
                        required=True,
                        choices=["enroll", "predict"],
                        help='Task to do. Either "enroll" or "predict"')
    args = parser.parse_args()

    if args.task == "predict" and not os.path.isfile(args.model_path):
        raise ValueError("Please provide valid model path")

    r = sr.Recognizer()
    r.pause_threshold = 1.0
    if args.task == "enroll":
        if os.path.isfile(args.model_path) and not args.overwrite_model:
            model = ModelInterface.load(args.model_path)
        else:
            model = ModelInterface()
        print("***** Enroll sound data for one speaker *****")
        name = input("Enter your name: ")
        name = name.strip()
        print(
            f"Hello {name}. Please input your voice {args.num_samples} times")
        with tempfile.TemporaryDirectory() as tempdir:
            i = 1
            while i <= args.num_samples:
                with sr.Microphone() as source:
                    audio = r.listen(source)
                # Generate random filename
                filename = os.path.join(
                    tempdir, name + "_" + str(uuid.uuid1()) + ".wav")
コード例 #28
0
def task_predict(input_files, input_model, isDynamic):
    """
    Predict the speaker from the given file(s)
    
    Args:
        input_files (string): full path to the speaker file
        input_model (string): model trained to give the solution
    """
    # Loads the model object and retrieve the number of speaker #

    m = ModelInterface.load(input_model)
    n_label = m.get_n_label()

    # Computes the threshold (dynamic or static) #

    if (isDynamic):
        dyn_thrsh = m.get_dyn_threshold()
    else:
        threshold = 1 / n_label

    # Creates an Evaluation object to save the results #

    ev = Evaluation()

    # Starts the prediction process #

    print(input_files)
    for f in glob.glob(os.path.expanduser(input_files)):
        try:
            start_time = time.time()
            fs, signal = read_wav(f)
            signal = signal / max(abs(signal))

            # Extracts the features and predicts the label using the higher score within all possible speaker #

            label, score = m.predict(fs, VAD_process(signal))

        except Exception as e:
            print(f + ' error %s' % (e))

        # Retrieves the expected label from the directory (evaluation not real time only) #

        root = os.path.split(f)
        if (input_files[-9:] == "*/*/*.wav"):
            root = os.path.split(root[0])
        speaker = os.path.basename(root[0])

        # Recognition process : If the given score is higher than the threshold, the label is correct #
        # Else the speaker is not recognize #

        if (isDynamic):
            threshold = dyn_thrsh[label]
        recog = (score > threshold)

        # recog = True

        if not (recog):
            print(speaker, ' not recognize. ->', label, 'Score->', score)

        else:
            print(speaker, '->', label, ', score->', score)

        # Adds the speaker and its results to the evaluation object #

        ev.new(speaker, label, recog)

    # Retrieves the Database label used and prints the accuracy #

    path = os.path.split(root[0])[0]
    DB_name = os.path.split(path)[0]
    DB_name = os.path.basename(os.path.split(DB_name)[1])
    print('Accuracy : ', ev.accuracy(), '\n')
    ev.save(os.path.basename(path), n_label, DB_name,
            (time.time() - start_time))
コード例 #29
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        print ('You are', label, "          GMM score of ", score)
コード例 #30
0
    )
    for i in range(0, 3, 1):
        if not (os.path.exists(input_model)):
            input_model = input(
                "Model does not exists ! " + str(3 - i) +
                " attempts left. Enter relative path of the model (ex.: ./model/Pers_DB.out) : "
            )
        else:
            break

    if os.path.exists(input_model):

        # Loading the model and starting the while loop #

        print("Model found ! Starting real-time recognition process")
        m = ModelInterface.load(input_model)
        speaker = input(
            "Write the name of the speaker (for evaluation purposes) :")

        start_time = time.time()
        while tmp < 5:
            count += 1

            buffer.record(chunk_size=sampling_rate)  # 1 second of record
            data = buffer.get_data()
            data = np.frombuffer(data, 'int16')

            # Predicting every 3 loop #
            # Recording at 16000 Hz as sampling rate, (1 * 3) sec as buffer size and converting data in int16 type #

            if count >= 3:
コード例 #31
0
	print(" + Predicting")
	input_model = "model.out"
	input_files = "./tmp.wav"
	# m = ModelInterface.load(input_model)
	m = model
	for f in glob.glob(os.path.expanduser(input_files)):
		fs, signal = read_wav(f)
		label = m.predict(fs, signal)
#		 print max(label, key=operator.itemgetter(1))
		if abs(label[0][1] - label[1][1]) > .0000002:
			speakers_detected = [x for x in label if x[1] >= -.1]
			if len(speakers_detected) > 0:
				print max(speakers_detected, key=operator.itemgetter(1))
			else:
				print "___			  not similar enough to know speaker","	   Best guess:", max(label, key=operator.itemgetter(1))
		else:
			print "...						  predictions too similar","	   Best guess:", max(label, key=operator.itemgetter(1))

mods = ModelInterface()

mods = mods.load("./model.out")
see_model(mods)

import operator

# find_user()

print "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"

for i in range(1,60):
	find_user(mods)
def feature_re_extract():
    #pdb.set_trace()
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    up_bound = []
    lower_bound = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    '''
    # put all values into -1~1
    up_bound = []
    lower_bound = []
    for j in xrange(len(train_set[0])):
    		up_bound.append(train_set[0][j])
    		lower_bound.append(train_set[0][j])
    
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				up_bound[j] = max(up_bound[j], train_set[i][j])
    				lower_bound[j] = min(lower_bound[j], train_set[i][j])
    
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1
    '''				
    # construct autoencoder
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=39,
        n_hidden=30
    )
    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    # train first autoencoder
    training_epochs = 20
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da(numpy.asarray(train_set)))
    		print 'Training 1st ae epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    # train second autoencoder
    train_set2 = da.get_hidden_values(train_set).eval()
    
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da2 = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=30,
        n_hidden=20
    )
    cost, updates = da2.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da2 = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    training_epochs = 20
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da2(numpy.asarray(train_set2)))
    		print 'Training 2nd ae epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    for c in test_class:
    		m.features[c] = da2.get_hidden_values(da.get_hidden_values(m.features[c]).eval()).eval()
    
    m.train()
    m.dump('model/model_sda.out')
    with open('model/da1.out', 'w') as f:
    		pickle.dump(da, f, -1)
    with open('model/da2.out', 'w') as f:
    		pickle.dump(da2, f, -1)
    return up_bound, lower_bound
コード例 #33
0
def record_predict(input_file, input_model):
    m = ModelInterface.load(input_model)
    fs, signal = read_wav(input_file)
    label = m.predict(fs, signal)
    return label
コード例 #34
0
def task_predict(input_files, input_model):
    # 把输入的多个模型目录字符串分离为目录列表
    input_models = [os.path.expanduser(k) for k in input_model.strip().split()]
    # 把各个目录下的模型列表解压出来组合成一个迭代器
    models = itertools.chain(*(glob.glob(m) for m in input_models))
    # 生成并加载包括所有模型文件(skgmm.GMMSet object)的列表
    models = [ModelInterface.load(m) for m in models]
    if len(models) == 0:
        print("No model file found in %s" % input_model)
        sys.exit(1)
    # 定义统计准确率的变量
    right = 0
    right1 = 0
    wrong = 0
    wrong1 = 0
    num = 0
    # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        print(f)
        feat = get_feature(fs, signal)
        predict_result = []
        # 每个音频文件分别匹配每个模型组并得出分数放到总列表
        for model in models:
            #print(model)
            #m = ModelInterface.load(model)
            results = model.predict_proba(feat)
            print(results)
            #print(results)
            for result in results:
                predict_result.append(result)
        #print("predict_result:",predict_result)
        # 对预测结果按分数作高到底排序
        predict_result = sorted(predict_result,
                                key=operator.itemgetter(1),
                                reverse=True)
        #print("sort_predict_result:", predict_result)
        # 微信语音数据集的label格式
        label = os.path.basename(f).split('_')[0]  #[6:11]
        #label=os.path.basename(f).split('(')[0]#[6:11]
        # AISHELL数据集的label格式
        # label=os.path.basename(f)[6:11]
        predict = predict_result[0][0]
        predict_score = predict_result[0][1]
        # #print('Top:',predict_result[:10])
        # 统计top1准确率
        if label in predict:
            right1 += 1
            print('label:', label, '  predict:', predict, '  score:',
                  predict_score, ' top1 right')
        else:
            wrong1 += 1
            print('label:', label, '  predict:', predict, '  score:',
                  predict_score, ' top1 wrong')
        # 统计Top10准确率
        predicts = []
        predict_scores = []
        for pre in predict_result[:10]:
            predicts.append(pre[0])
            predict_scores.append(pre[1])
        if label in predicts:
            right += 1
            print('label:', label, '  predicts:', predicts, '  scores:',
                  predict_scores, ' top10 Right')
        else:
            wrong += 1
            print('label:', label, '  predicts:', predicts, '  scores:',
                  predict_scores, ' top10 Wrong')
        num += 1
    print('top1:', num, '  right:', right1, '  wrong:', wrong1, ' top1 acc:',
          right1 / num)
    print('top10:', num, '  right:', right, '  wrong:', wrong, ' top10 acc:',
          right / num)
def task_predict(input_files, input_model):
    # 把输入的多个模型目录字符串分离为目录列表
    input_models = [os.path.expanduser(k) for k in input_model.strip().split()]
    # 把各个目录下的模型列表解压出来组合成一个迭代器
    models = itertools.chain(*(glob.glob(m) for m in input_models))
    # 生成并加载包括所有模型文件(skgmm.GMMSet object)的列表
    models = [ModelInterface.load(m) for m in models]
    if len(models) == 0:
        print("No model file found in %s" % input_model)
        sys.exit(1)
    # 定义统计准确率的变量
    right = 0
    right1 = 0
    wrong = 0
    wrong1 = 0
    num = 0
    # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果
    for f in glob.glob(os.path.expanduser(input_files)):
        start_time = time.time()
        fs, signal = read_wav(f)
        print(f)
        feat = get_feature(fs, signal)
        #print("Get feature ", time.time() - start_time, " seconds")
        predict_result = []
        f_models = [(feat, m) for m in models]
        #print(models)
        # 每个音频文件分别匹配每个模型组并得出分数放到总列表
        # for model in models:
        #     #start_time1 = time.time()
        #     #print(model)
        #     # 模型文件是一个元组:(label,gmm)
        #     score = model[1].score(feat)
        #     label=model[0]
        #     result=(label,score)
        #     #print(results)
        #     predict_result.append(result)
        #print("Get one score ", time.time() - start_time1, " seconds")
        pool = ThreadPool(2)
        predict_result = pool.map(get_score, f_models)
        pool.close()
        pool.join()
        #print(results)
        #print("Get score ", time.time() - start_time, " seconds")
        proba = GMMSet.softmax([i[1] for i in predict_result])
        predict_result = [(predict_result[i][0], proba[i])
                          for i in range(len(proba))]
        #print("predict_result:",predict_result)
        # 对预测结果按分数作高到底排序
        predict_result = sorted(predict_result,
                                key=operator.itemgetter(1),
                                reverse=True)
        #print("sort_predict_result:", predict_result)
        # 微信语音数据集的label格式
        label = os.path.basename(f).split('_')[0]  #[6:11]
        #label=os.path.basename(f).split('(')[0]#[6:11]
        # AISHELL数据集的label格式
        # label=os.path.basename(f)[6:11]
        predict = predict_result[0][0]
        predict_score = predict_result[0][1]
        print("Predict ", time.time() - start_time, " seconds")
        # #print('Top:',predict_result[:10])
        # 统计top1准确率
        if label in predict:
            right1 += 1
            print('label:', label, '  predict:', predict, '  score:',
                  predict_score, ' top1 right')
        else:
            wrong1 += 1
            print('label:', label, '  predict:', predict, '  score:',
                  predict_score, ' top1 wrong')
        # 统计Top10准确率
        predicts = []
        predict_scores = []
        for pre in predict_result[:10]:
            predicts.append(pre[0])
            predict_scores.append(pre[1])
        if label in predicts:
            right += 1
            print('label:', label, '  predicts:', predicts, '  scores:',
                  predict_scores, ' top10 Right')
        else:
            wrong += 1
            print('label:', label, '  predicts:', predicts, '  scores:',
                  predict_scores, ' top10 Wrong')
        num += 1
    print('top1:', num, '  right:', right1, '  wrong:', wrong1, ' top1 acc:',
          right1 / num)
    print('top10:', num, '  right:', right, '  wrong:', wrong, ' top10 acc:',
          right / num)