コード例 #1
0
def train_and_dump(dirs, start, end, output_model, features_save):
    m = ModelInterface()
    #print("len(dirs[start:end]):", len(dirs[start:end]))
    for d in dirs[start:end]:
        label = os.path.basename(d.rstrip('/'))
        wavs = glob.glob(d + '/*.wav')

        if len(wavs) == 0:
            print("No wav file found in %s" % (d))
            continue
        for wav in wavs:
            try:
                fs, signal = read_wav(wav)
                m.enroll(label, fs, signal)
                #print("wav %s has been enrolled" % (wav))
            except Exception as e:
                print(wav + " error %s" % (e))
    print("The group wav files has been enrolled")
    # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件
    if features_save:
        m.mfcc_dump(features_save)
        print(
            "The features of this group wav files has been pickle.dumped to %s"
            % features_save)
    m.train()
    m.dump(output_model)
    print("%s has been pickle.dumped\t" % output_model)
コード例 #2
0
ファイル: main.py プロジェクト: riteme/property-prediction
def _cache(data: TextIO, model_name: Text, output: BinaryIO, **kwargs):
    cpu = require_device(prefer_cuda=False)
    model_type = models.select(model_name)
    model = ModelInterface(model_type, cpu, False)

    csv = util.load_csv(data)
    cache = {}
    for smiles in csv.keys():
        cache_key = (smiles, )  # memcached is indexed on argument list
        data = model.process(smiles)
        cache[cache_key] = model.encode_data(data, **kwargs)

    pickle.dump(cache, output)
コード例 #3
0
def task_enroll(input_dirs, output_model):
    m = ModelInterface()
    input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()]
    dirs = itertools.chain(*(glob.glob(d) for d in input_dirs))
    dirs = [d for d in dirs if os.path.isdir(d)]

    files = []
    if len(dirs) == 0:
        print ("No valid directory found!")
        sys.exit(1)

    for d in dirs:
        label = os.path.basename(d.rstrip('/'))
        wavs = glob.glob(d + '/*.wav')

        if len(wavs) == 0:
            print ("No wav file found in %s"%(d))
            continue
        for wav in wavs:
            try:
                fs, signal = read_wav(wav)
                m.enroll(label, fs, signal)
                print("wav %s has been enrolled"%(wav))
            except Exception as e:
                print(wav + " error %s"%(e))

    m.train()
    m.dump(output_model)
コード例 #4
0
ファイル: gui.py プロジェクト: TAzOnee/SpeakerRecognition
 def load_model(self, MainWindow):
     global m
     fileName = QtWidgets.QFileDialog().getOpenFileName(
         MainWindow, "Load Model", "", "Model File (*.out)")
     print(fileName[0])
     self.ln_model.setText(fileName[0])
     m = ModelInterface.load(fileName[0])
コード例 #5
0
def task_predict(input_files, input_model):

    m = ModelInterface.load(input_model)
    if os.path.exists(input_files):
        for f in glob.glob(os.path.expanduser(input_files)):
            fs, signal = read_wav(f)
            label, score = m.predict(fs, signal)
            filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename(
                input_files)
            with DB(host='47.92.33.19',
                    user='******',
                    passwd='1qazxsw2',
                    db='database_fm') as db:

                # db.execute("INSERT INTO database_fm (id,radio_file_path,sound_markup) VALUES (null,'{}','{}')".format(f,label))
                db.execute(
                    "UPDATE fm_t_scan_record SET sound_markup = '{}' WHERE radio_file_path = '{}'"
                    .format(label, filepath))
            print(filepath, '->', label, ", score->", score)
            os.remove(f)
    else:
        filepath = "http://sh.illegalfm.com:4881/record/" + os.path.basename(
            input_files)
        with DB(host='47.92.33.19',
                user='******',
                passwd='1qazxsw2',
                db='database_fm') as db:
            db.execute(
                "UPDATE fm_t_scan_record SET sound_markup = 'Exception' WHERE radio_file_path = '{}'"
                .format(filepath))
コード例 #6
0
def task_verify(wav_url, person_id):
    start_time = time.time()
    print('开始时间:',
          time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)))
    m = ModelInterface.load(model)

    if person_id not in m.features:
        return 'fail', 'current user not trained', ''

    # 下载训练语音文件,
    current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time()))
    dest_wav = verify_voice_dir + current_time + '_' + person_id + '.wav'
    utils.download_file(wav_url, dest_wav)

    for f in glob.glob(os.path.expanduser(dest_wav)):
        fs, signal = utils.read_wav(f)
        probability = m.verify(fs, signal, person_id)
        print(probability)
        if probability > -48:
            print(f, '-> 匹配成功 :', person_id)
            return 'success', '', 'yes'
        else:
            print(f, '->未匹配成功')
            return 'success', '', 'no'

    end_time = time.time()
    print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S',
                                 time.localtime(end_time)))
    print('共耗时', end_time - start_time)
コード例 #7
0
ファイル: main.py プロジェクト: riteme/property-prediction
def build_model(model_info: TModelInfo,
                no_initialize: bool = False) -> ModelInterface:
    model_type, device, kwargs = model_info
    return ModelInterface(model_type,
                          device,
                          no_initialize=no_initialize,
                          **kwargs)
コード例 #8
0
def OrderEnroll():
    m = ModelInterface.load("model.out")
    fs, signal = read_wav("./GUI/TotalRecording/18082020202755.wav")
    m.enroll("18082020202755", fs, signal)
    m.train()
    m.CheckEnroll()
    m.dump("mo1.out")


# def task_predictgui(path, input_model):
#     m = ModelInterface.load(input_model)
#     f=glob.glob(path)
#     fs, signal = read_wav(f[0])
#     label, score = m.predict(fs, signal)
#     return label
#
# if __name__ == "__main__":
#     # global args
#     # args = get_args()
#     #
#     # task = args.task
#     #
#     # if task == 'enroll':
#     #      task_enroll(args.input, args.model)
#     #
#     #
#     # elif task == 'predict':
#     #      task_predict(args.input, args.model)
#     #      task_predict("datatest/*.wav", "model1.out")
# task_enroll("./Train/*","model.out")
#
#      # task_predict("./Test", "model.out")
# Predict_ByFile("./GUI/TotalRecording/18082020202755.wav", "D:/doantotnghiep/Speaker_recognition/model.out")
# OrderEnroll()
コード例 #9
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    fs, signal = read_wav(input_files)
    label, score = m.predict(fs, signal)
    print("label", '->', label, ", score->", score)
    result = [label, score]
    return result
コード例 #10
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        print(f, '->', label, ", score->", score)
    return label, score
コード例 #11
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        g = open("Test_results.txt", "a")
        print(f, '->', label, ", score->", score, file=g)
        g.close()
コード例 #12
0
def task_predict(input_files, input_model):
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)

        str = "label = {l2}  score = {l3}".format(l2=label, l3=score)
        return [str, label]
コード例 #13
0
def task_check_status(person_id):

    m = ModelInterface.load(model)

    if person_id not in m.features:
        return 'success', '', 'no'
    else:
        return 'success', '', 'yes'
コード例 #14
0
ファイル: gui.py プロジェクト: danenigma/speaker_recoginition
 def load(self):
     fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "")
     if fname:
         try:
             self.backend = ModelInterface.load(fname)
         except Exception as e:
             self.warn(str(e))
         else:
             self.status("Loaded from file: " + fname)
コード例 #15
0
 def load(self):
     fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "")
     if fname:
         try:
             self.backend = ModelInterface.load(fname)
         except Exception as e:
             self.warn(str(e))
         else:
             self.status("Loaded from file: " + fname)
コード例 #16
0
	def __init__(self):
		
		self.backend = ModelInterface.load(self.INPUT_MODEL)
		try:
		    fs, signal = read_wav(self.BG)
		    self.backend.init_noise(fs, signal)
		except:
		    print "file not found!"
		self.pub  = rospy.Publisher('/speaker',String,queue_size = 10)
		self.sub  = rospy.Subscriber('/wav',numpy_msg(Floats),self.task_predict)
コード例 #17
0
def evaluate_loss(
    model: ModelInterface,
    batch: Sequence[Any],
    label: torch.Tensor
) -> torch.Tensor:
    # criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 60.0]))
    criterion = torch.nn.CrossEntropyLoss()
    pred = model.forward(batch)
    loss = criterion(pred, label)
    return loss
コード例 #18
0
    def __init__(self):

        self.backend = ModelInterface.load(self.INPUT_MODEL)
        try:
            fs, signal = read_wav(self.BG)
            self.backend.init_noise(fs, signal)
        except:
            print "file not found!"
        self.pub = rospy.Publisher('/speaker', String, queue_size=10)
        self.sub = rospy.Subscriber('/wav', numpy_msg(Floats),
                                    self.task_predict)
コード例 #19
0
def Predict_ByFile(file, input_model):
    print("start")
    m = ModelInterface.load(input_model)
    fs, signal = read_wav(file)
    print(fs)
    print(signal)
    label, score = m.predict(fs, signal)
    strPath = os.path.realpath(file)
    y_true = os.path.basename(os.path.dirname(strPath))
    print(label)
    print(score)
    return label
def feature_re_extract():
    #pdb.set_trace()
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    				
    # construct autoencoder
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=39,
        n_hidden=100
    )
    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    # train autoencoder
    training_epochs = 100
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da(numpy.asarray(train_set)))
    		print 'Training epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    for c in test_class:
    		m.features[c] = da.get_hidden_values(m.features[c]).eval()
    
    m.train()
    m.dump('model/model_da.out')
    with open('model/da.out', 'w') as f:
    		pickle.dump(da, f, -1)
コード例 #21
0
def train(train_data_dir, model_path):
    m = ModelInterface()
    files = [f for f in os.listdir(train_data_dir) if re.search(r"\.wav", f)]
    for f in files:
        label, _ = f.split("_")
        file = os.path.join(train_data_dir, f)
        try:
            fs, signal = read_wav(file)
            m.enroll(label, fs, signal)
            logger.info("wav %s has been enrolled" % (file))
        except Exception as e:
            logger.info(file + " error %s" % (e))

    m.train()
    m.dump(model_path)
コード例 #22
0
def evaluate(eval_data_dir, model_path):
    m = ModelInterface.load(model_path)
    files = [f for f in os.listdir(eval_data_dir) if re.search(r"\.wav", f)]
    total, n_correct = 0, 0
    for f in files:
        total += 1
        label, _ = f.split("_")
        file = os.path.join(eval_data_dir, f)
        fs, signal = read_wav(file)
        pred, _ = m.predict(fs, signal)
        logger.info("Input: {}, Output: {}".format(file, pred))
        if label == pred:
            n_correct += 1
    logger.info("Accuracy: {}".format(n_correct / total))
def task_enroll(input_dirs,
                output_model,
                features_save=None,
                group_person_num=None):
    m = ModelInterface()
    # 把输入的多个目录字符串分离为目录列表
    input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()]
    # 把各个目录下的子目录列表解压出来组合成一个迭代器
    dirs = itertools.chain(*(glob.glob(d) for d in input_dirs))
    # 生成包括所有子目录的列表
    dirs = [d for d in dirs if os.path.isdir(d)]

    for d in dirs:
        label = os.path.basename(d.rstrip('/'))
        wavs = glob.glob(d + '/*.wav')

        if len(wavs) == 0:
            print("No wav file found in %s" % (d))
            continue
        for wav in wavs:
            try:
                fs, signal = read_wav(wav)
                m.enroll(label, fs, signal)
                #print("wav %s has been enrolled" % (wav))
            except Exception as e:
                print(wav + " error %s" % (e))
    print("The wav files has been enrolled")
    # 如果指定了mfcc特征文件保存路径,则保存mfcc特征文件
    if features_save:
        m.mfcc_dump(features_save)
        print(
            "The features of this group wav files has been pickle.dumped to %s"
            % features_save)
    m.train()
    m.dump(output_model)
    print("%s has been pickle.dumped\t" % output_model)
コード例 #24
0
def task_train_single(wav_url, person_id):

    if os.path.exists(model):
        m = ModelInterface.load(model)
    else:
        m = ModelInterface()

    if person_id in m.features:
        return 'fail', 'aleady exist'

    #下载训练语音文件
    dest_dir = train_voice_dir + person_id
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    current_time = time.strftime("%Y%m%d%H%I%S", time.localtime(time.time()))
    dest_wav = dest_dir + '/' + current_time + '_' + person_id + '.wav'

    print(wav_url)
    print(dest_wav)
    utils.download_file(wav_url, dest_wav)

    #获取下载好的训练语音文件
    wavs = glob.glob(dest_dir + '/*.wav')

    if len(wavs) == 0:
        return 'fail', 'no wav files under this dir'

    #train the wavs
    for wav in wavs:
        try:
            fs, signal = utils.read_wav(wav)
            m.enroll(person_id, fs, signal)
            print("wav %s has been enrolled" % (wav))
        except Exception as e:
            print(wav + " error %s" % (e))

    m.train_single(person_id)
    m.dump(model)

    return 'success', ''
コード例 #25
0
def task_predict(input_files, input_model):
    # 把输入的多个模型目录字符串分离为目录列表
    input_models = [os.path.expanduser(k) for k in input_model.strip().split()]
    # 把各个目录下的模型列表解压出来组合成一个迭代器
    models = itertools.chain(*(glob.glob(m) for m in input_models))
    # 生成并加载包括所有模型文件的列表
    models = [ModelInterface.load(m) for m in models]
    # 定义统计准确率的变量
    right = 0
    wrong = 0
    num = 0
    # 对每个预测音频文件提取特征并与每个模型匹配得到TOP结果
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = read_wav(f)
        print(f)
        feat = get_feature(fs, signal)
        predict_result = []
        # 每个音频文件分别匹配每个模型组并得出分数放到总列表
        for model in models:
            #print(model)
            #m = ModelInterface.load(model)
            results = model.predict(feat)
            for result in results:
                predict_result.append(result)
        #print("predict_result:",predict_result)
        # 对预测结果按分数作高到底排序
        predict_result = sorted(predict_result,
                                key=operator.itemgetter(1),
                                reverse=True)
        #print("sort_predict_result:", predict_result)
        # 微信语音数据集的label格式
        label = os.path.basename(f).split('_')[0]  #[6:11]
        #label=os.path.basename(f).split('(')[0]#[6:11]
        # AISHELL数据集的label格式
        #label=os.path.basename(f)[6:11]
        predict = predict_result[0][0]
        #print('Top:',predict_result[:10])
        # 统计准确率
        if label in predict:
            right += 1
            print('label:', label, '  predict:', predict, '  right')
        else:
            wrong += 1
            print('label:', label, '  predict:', predict, '  wrong')
        num += 1
    print('All:', num, '  right:', right, '  wrong:', wrong, '  acc:',
          right / num)
コード例 #26
0
def task_predict():
    m = ModelInterface.load('data.bin')
    predict_sound_path = os.path.join(os.getcwd(), 'predictSounds')
    dirs = os.listdir(predict_sound_path)
    wavs = []
    if len(dirs) == 0:
        print('No wav files found')
    else:
        for d in dirs:
            ext = os.path.splitext(d)[-1].lower()
            if ext == '.wav':
                wavs.append(d)
    for w in wavs:
        sample_rate, signal = read_wav(os.path.join(predict_sound_path, w))
        label = os.path.splitext(w)[0]
        label2, score = m.predict(sample_rate, signal)
        print(label, '->', label2, '->', score)
def train( ):
    m = ModelInterface()
    train_dir = 'data/train/'
    train_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    file_name = ['a.wav', 'b.wav', 'c.wav', 'd.wav', 'e.wav', 'f.wav', 'g.wav']
    for c in train_class:
    		for n in file_name:
    				fs, signal = wavfile.read(train_dir + c + n)
    				m.enroll(c, fs, signal)
    m.train()
    m.dump('model/model.out')
コード例 #28
0
def task_predict(input_files, input_model):
    total = 0
    acc = 0
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        total += 1
        fs, signal = read_wav(f)
        label = m.predict(fs, signal)
        print(f, '->', label, end=''),
        if f.split('/')[-2] == label:
            print("√")
            acc += 1
        else:
            print('×')

    acc = acc * 1.0 / total
    print(acc)
コード例 #29
0
def task_mfcc_train(input_files, output_model):
    # 把所有mfcc特征文件统一到一个字典里面
    mfcc_dic_all = {}
    for file in glob.glob(os.path.expanduser(input_files)):
        with open(file, 'rb') as f:
            mfcc_dic = pickle.load(f)
            # 合并字典
            mfcc_dic_all = {**mfcc_dic, **mfcc_dic_all}
            #print([k for k in mfcc_dic])
    # 训练并保存模型文件
    m = ModelInterface()
    m.features = mfcc_dic_all
    m.train()
    m.dump(output_model)
    print("%s has been pickle.dumped\t" % output_model)
コード例 #30
0
ファイル: main.py プロジェクト: EZlzh/property-prediction
def train_step(
        model: ModelInterface,
        # `torch.optim.optimizer.Optimizer` is ghost.
        # WHY DOES MYPY NOT RECOGNIZE `torch.optim.Optimizer`?
        optimizer: 'torch.optim.optimizer.Optimizer',
        batch: Sequence[Any],
        label: torch.Tensor) -> float:
    # criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor([1.0, 60.0]))
    criterion = torch.nn.CrossEntropyLoss()
    optimizer.zero_grad()

    pred = model.forward(batch)
    loss = criterion(pred, label)
    loss.backward()
    optimizer.step()

    return loss.item()
コード例 #31
0
def task_predict(input_files, input_model):
    start_time = time.time()
    print('开始时间:',
          time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)))
    m = ModelInterface.load(input_model)
    for f in glob.glob(os.path.expanduser(input_files)):
        fs, signal = utils.read_wav(f)
        label, probability = m.predict(fs, signal)
        #print(probability)
        if probability > -48:
            print(f, '->', label)
        else:
            print(f, '->未识别到说话人')
    end_time = time.time()
    print('结束时间:', time.strftime('%Y-%m-%d %H:%M:%S',
                                 time.localtime(end_time)))
    print('共耗时', end_time - start_time)
def test( up_bound, lower_bound ):
    m = ModelInterface.load('model/model_da.out')
    with open('model/da.out', 'r') as f:
    		da = pickle.load(f)
    count = 0;
    test_dir = 'data/test/'
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    file_name = ['1.wav', '2.wav']
    for c in test_class:
    		for n in file_name:
    				fs, signal = wavfile.read(test_dir + c + n)
    				if(predict(m, fs, signal[:80000], da, up_bound, lower_bound) == c):
    					count = count + 1
    print 'accuracy is:', (100.0*count)/(len(test_class)*len(file_name)), '%'
コード例 #33
0
def evaluate_model(
    model: ModelInterface,
    batch: List[object],
    label: List[int],
    show_stats: bool = False
) -> Tuple[float, float, List[int]]:
    with torch.no_grad():
        pred = model.predict(batch)
        pred_label = pred.argmax(dim=1)
        index = pred_label.to(torch.bool)

        if show_stats:
            stats = torch.cat([
                pred[index].to('cpu'),
                torch.tensor(label, dtype=torch.float).reshape(-1, 1)[index]
            ], dim=1)
            log.debug(stats)

    roc_auc, prc_auc = util.evaluate_auc(label, pred[:, 1])
    return roc_auc, prc_auc, pred_label.tolist()
def feature_re_extract():
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    up_bound = []
    lower_bound = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    
    # put all values into -1~1
    up_bound = []
    lower_bound = []
    for j in xrange(len(train_set[0])):
    		up_bound.append(train_set[0][j])
    		lower_bound.append(train_set[0][j])
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				up_bound[j] = max(up_bound[j], train_set[i][j])
    				lower_bound[j] = min(lower_bound[j], train_set[i][j])
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1
    
    # construct stacked autoencoder
    sda = mSdA(
    		layers = [39, 100]
    )
    sda.setMinMax(up_bound, lower_bound)
    sda.train(train_set, 500) # use 500 as the batch size
    for c in test_class:
    		m.features[c] = sda.get_hidden_values(m.features[c])
    m.train()
    m.dump('model/model_sda.out')
    sda.dump('model/sda.out')
コード例 #35
0
def task_predict(path, input_model):
    m = ModelInterface.load(input_model)
    files = []
    sum, true = 0, 0
    # r=root, d=directories, f = files
    for r, d, f in os.walk(path):
        for file in f:
            if '.wav' in file:
                files.append(os.path.join(r, file))
    for f in files:
        sum += 1
        fs, signal = read_wav(f)
        label, score = m.predict(fs, signal)
        strPath = os.path.realpath(f)
        y_true = os.path.basename(os.path.dirname(strPath))
        if (label == y_true):
            true += 1
        print(f, '->', label, ", score->", score)
    print('So file du doan dung: ', true)
    print('Tong so file: ', sum)
    print('accuracy: ', true / sum * 100, '%')
def test():
    m = ModelInterface.load('model/model_sda.out')
    sda = mSdA.load('model/sda.out')
    count = 0
    allsum = 0
    test_dir = 'data/test/'
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    file_name = ['1.wav', '2.wav']
    for c in test_class:
    		for n in file_name:
    				fs, signal = wavfile.read(test_dir + c + n)
    				signal_size = 40000
    				for indx in xrange(len(signal)/signal_size):
    						allsum = allsum + 1
    						if(predict(m, fs, signal[indx*signal_size:(indx+1)*signal_size], sda) == c):
    								count = count + 1
    print 'accuracy is:', (100.0*count)/(allsum), '%'
コード例 #37
0
def task_realtime_predict(input_model):
    print('start')
    m = ModelInterface.load(input_model)

    # set recording parameter
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    INTERVAL = 1
    INITLEN = 2

    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("* recording")

    frames = []

    # fulfill the frame
    for i in range(0, int(RATE / CHUNK * INITLEN)):
        data = np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist()
        frames.append(data)

    while True:
        for i in range(0, int(RATE / CHUNK * INTERVAL)):
            # 添加新的时间窗数据
            frames.append(
                np.fromstring(stream.read(CHUNK), dtype=np.int16).tolist())
            # 去掉最老的时间窗数据
            frames.remove(frames[0])

        framesjoin = utils.flat_array(frames)
        framesjoin = np.array(framesjoin)
        label, probability = m.predict(16000, framesjoin)
        print('当前说话人->', label)
コード例 #38
0
ファイル: gui.py プロジェクト: danenigma/speaker_recoginition
    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        uic.loadUi("edytor.ui", self)
        self.statusBar()

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.timer_callback)

        self.noiseButton.clicked.connect(self.noise_clicked)
        self.recording_noise = False
        self.loadNoise.clicked.connect(self.load_noise)

        self.enrollRecord.clicked.connect(self.start_enroll_record)
        self.stopEnrollRecord.clicked.connect(self.stop_enroll_record)
        self.enrollFile.clicked.connect(self.enroll_file)
        self.enroll.clicked.connect(self.do_enroll)
        self.startTrain.clicked.connect(self.start_train)
        self.dumpBtn.clicked.connect(self.dump)
        self.loadBtn.clicked.connect(self.load)

        self.recoRecord.clicked.connect(self.start_reco_record)
        self.stopRecoRecord.clicked.connect(self.stop_reco_record)
#        self.newReco.clicked.connect(self.new_reco)
        self.recoFile.clicked.connect(self.reco_file)
        self.recoInputFiles.clicked.connect(self.reco_files)

        #UI.init
        self.userdata =[]
        self.loadUsers()
        self.Userchooser.currentIndexChanged.connect(self.showUserInfo)
        self.ClearInfo.clicked.connect(self.clearUserInfo)
        self.UpdateInfo.clicked.connect(self.updateUserInfo)
        self.UploadImage.clicked.connect(self.upload_avatar)
        #movie test
        self.movie = QMovie(u"image/recording.gif")
        self.movie.start()
        self.movie.stop()
        self.Animation.setMovie(self.movie)
        self.Animation_2.setMovie(self.movie)
        self.Animation_3.setMovie(self.movie)

        self.aladingpic = QPixmap(u"image/a_hello.png")
        self.Alading.setPixmap(self.aladingpic)
        self.Alading_conv.setPixmap(self.aladingpic)

        #default user image setting
        self.avatarname = "image/nouser.jpg"
        self.defaultimage = QPixmap(self.avatarname)
        self.Userimage.setPixmap(self.defaultimage)
        self.recoUserImage.setPixmap(self.defaultimage)
        self.convUserImage.setPixmap(self.defaultimage)
        self.load_avatar('avatar/')

        # Graph Window init
        self.graphwindow = GraphWindow()
        self.newname = ""
        self.lastname = ""
        self.Graph_button.clicked.connect(self.graphwindow.show)
        self.convRecord.clicked.connect(self.start_conv_record)
        self.convStop.clicked.connect(self.stop_conv)

        self.backend = ModelInterface()

        # debug
        QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug)

        #init
        try:
            fs, signal = read_wav("bg.wav")
            self.backend.init_noise(fs, signal)
        except:
            pass
コード例 #39
0
ファイル: gui.py プロジェクト: danenigma/speaker_recoginition
class Main(QMainWindow):
    CONV_INTERVAL = 0.4
    CONV_DURATION = 1.5
    CONV_FILTER_DURATION = CONV_DURATION
    FS = 8000
    TEST_DURATION = 3

    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        uic.loadUi("edytor.ui", self)
        self.statusBar()

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.timer_callback)

        self.noiseButton.clicked.connect(self.noise_clicked)
        self.recording_noise = False
        self.loadNoise.clicked.connect(self.load_noise)

        self.enrollRecord.clicked.connect(self.start_enroll_record)
        self.stopEnrollRecord.clicked.connect(self.stop_enroll_record)
        self.enrollFile.clicked.connect(self.enroll_file)
        self.enroll.clicked.connect(self.do_enroll)
        self.startTrain.clicked.connect(self.start_train)
        self.dumpBtn.clicked.connect(self.dump)
        self.loadBtn.clicked.connect(self.load)

        self.recoRecord.clicked.connect(self.start_reco_record)
        self.stopRecoRecord.clicked.connect(self.stop_reco_record)
#        self.newReco.clicked.connect(self.new_reco)
        self.recoFile.clicked.connect(self.reco_file)
        self.recoInputFiles.clicked.connect(self.reco_files)

        #UI.init
        self.userdata =[]
        self.loadUsers()
        self.Userchooser.currentIndexChanged.connect(self.showUserInfo)
        self.ClearInfo.clicked.connect(self.clearUserInfo)
        self.UpdateInfo.clicked.connect(self.updateUserInfo)
        self.UploadImage.clicked.connect(self.upload_avatar)
        #movie test
        self.movie = QMovie(u"image/recording.gif")
        self.movie.start()
        self.movie.stop()
        self.Animation.setMovie(self.movie)
        self.Animation_2.setMovie(self.movie)
        self.Animation_3.setMovie(self.movie)

        self.aladingpic = QPixmap(u"image/a_hello.png")
        self.Alading.setPixmap(self.aladingpic)
        self.Alading_conv.setPixmap(self.aladingpic)

        #default user image setting
        self.avatarname = "image/nouser.jpg"
        self.defaultimage = QPixmap(self.avatarname)
        self.Userimage.setPixmap(self.defaultimage)
        self.recoUserImage.setPixmap(self.defaultimage)
        self.convUserImage.setPixmap(self.defaultimage)
        self.load_avatar('avatar/')

        # Graph Window init
        self.graphwindow = GraphWindow()
        self.newname = ""
        self.lastname = ""
        self.Graph_button.clicked.connect(self.graphwindow.show)
        self.convRecord.clicked.connect(self.start_conv_record)
        self.convStop.clicked.connect(self.stop_conv)

        self.backend = ModelInterface()

        # debug
        QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug)

        #init
        try:
            fs, signal = read_wav("bg.wav")
            self.backend.init_noise(fs, signal)
        except:
            pass


    ############ RECORD
    def start_record(self):
        self.pyaudio = pyaudio.PyAudio()
        self.status("Recording...")
        self.movie.start()
        self.Alading.setPixmap(QPixmap(u"image/a_thinking.png"))


        self.recordData = []
        self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=Main.FS,
                        input=True, frames_per_buffer=1)
        self.stopped = False
        self.reco_th = RecorderThread(self)
        self.reco_th.start()

        self.timer.start(1000)
        self.record_time = 0
        self.update_all_timer()

    def add_record_data(self, i):
        self.recordData.append(i)
        return self.stopped

    def timer_callback(self):
        self.record_time += 1
        self.status("Recording..." + time_str(self.record_time))
        self.update_all_timer()

    def stop_record(self):
        self.movie.stop()
        self.stopped = True
        self.reco_th.wait()
        self.timer.stop()
        self.stream.stop_stream()
        self.stream.close()
        self.pyaudio.terminate()
        self.status("Record stopeed")

    ############## conversation
    def start_conv_record(self):
        self.conv_result_list = []
        self.start_record()
        self.conv_now_pos = 0
        self.conv_timer = QTimer(self)
        self.conv_timer.timeout.connect(self.do_conversation)
        self.conv_timer.start(Main.CONV_INTERVAL * 1000)
        #reset
        self.graphwindow.wid.reset()

    def stop_conv(self):
        self.stop_record()
        self.conv_timer.stop()

    def do_conversation(self):
        interval_len = int(Main.CONV_INTERVAL * Main.FS)
        segment_len = int(Main.CONV_DURATION * Main.FS)
        self.conv_now_pos += interval_len
        to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]):
                                   self.conv_now_pos]
        signal = np.array(to_filter, dtype=NPDtype)
        label = None
        try:
            signal = self.backend.filter(Main.FS, signal)
            if len(signal) > 50:
                label = self.backend.predict(Main.FS, signal, True)
        except Exception as e:
            print traceback.format_exc()
            print str(e)

        global last_label_to_show
        label_to_show = label
        if label and self.conv_result_list:
            last_label = self.conv_result_list[-1]
            if last_label and last_label != label:
                label_to_show = last_label_to_show
        self.conv_result_list.append(label)

        print label_to_show, "label to show"
        last_label_to_show = label_to_show

        #ADD FOR GRAPH
        if label_to_show is None:
            label_to_show = 'Nobody'
        if len(NAMELIST) and NAMELIST[-1] != label_to_show:
            NAMELIST.append(label_to_show)
        self.convUsername.setText(label_to_show)
        self.Alading_conv.setPixmap(QPixmap(u"image/a_result.png"))
        self.convUserImage.setPixmap(self.get_avatar(label_to_show))


    ###### RECOGNIZE
    def start_reco_record(self):
        self.Alading.setPixmap(QPixmap(u"image/a_hello"))
        self.recoRecordData = np.array((), dtype=NPDtype)
        self.start_record()

    def stop_reco_record(self):
        self.stop_record()
        signal = np.array(self.recordData, dtype=NPDtype)
        self.reco_remove_update(Main.FS, signal)

    def reco_do_predict(self, fs, signal):
        label = self.backend.predict(fs, signal)
        if not label:
            label = "Nobody"
        print label
        self.recoUsername.setText(label)
        self.Alading.setPixmap(QPixmap(u"image/a_result.png"))
        self.recoUserImage.setPixmap(self.get_avatar(label))

        # TODO To Delete
        write_wav('reco.wav', fs, signal)

    def reco_remove_update(self, fs, signal):
        new_signal = self.backend.filter(fs, signal)
        print "After removed: {0} -> {1}".format(len(signal), len(new_signal))
        self.recoRecordData = np.concatenate((self.recoRecordData, new_signal))
        real_len = float(len(self.recoRecordData)) / Main.FS / Main.TEST_DURATION * 100
        if real_len > 100:
            real_len = 100

        self.reco_do_predict(fs, self.recoRecordData)


    def reco_file(self):
        fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)")
        print 'reco_file'
        if not fname:
            return
        self.status(fname)

        fs, signal = read_wav(fname)
        self.reco_do_predict(fs, signal)

    def reco_files(self):
        fnames = QFileDialog.getOpenFileNames(self, "Select Wav Files", "", "Files (*.wav)")
        print 'reco_files'
        for f in fnames:
            fs, sig = read_wav(f)
            newsig = self.backend.filter(fs, sig)
            label = self.backend.predict(fs, newsig)
            print f, label

    ########## ENROLL
    def start_enroll_record(self):
        self.enrollWav = None
        self.enrollFileName.setText("")
        self.start_record()

    def enroll_file(self):
        fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)")
        if not fname:
            return
        self.status(fname)
        self.enrollFileName.setText(fname)
        fs, signal = read_wav(fname)
        signal = monophonic(signal)
        self.enrollWav = (fs, signal)

    def stop_enroll_record(self):
        self.stop_record()
        print self.recordData[:300]
        signal = np.array(self.recordData, dtype=NPDtype)
        self.enrollWav = (Main.FS, signal)

        # TODO To Delete
        write_wav('enroll.wav', *self.enrollWav)

    def do_enroll(self):
        name = self.Username.text().trimmed()
        if not name:
            self.warn("Please Input Your Name")
            return
#        self.addUserInfo()
        new_signal = self.backend.filter(*self.enrollWav)
        print "After removed: {0} -> {1}".format(len(self.enrollWav[1]), len(new_signal))
        print "Enroll: {:.4f} seconds".format(float(len(new_signal)) / Main.FS)
        if len(new_signal) == 0:
            print "Error! Input is silent! Please enroll again"
            return
        self.backend.enroll(name, Main.FS, new_signal)

    def start_train(self):
        self.status("Training...")
        self.backend.train()
        self.status("Training Done.")

    ####### UI related
    def getWidget(self, splash):
        t = QtCore.QElapsedTimer()
        t.start()
        while (t.elapsed() < 800):
            str = QtCore.QString("times = ") + QtCore.QString.number(t.elapsed())
            splash.showMessage(str)
            QtCore.QCoreApplication.processEvents()

    def upload_avatar(self):
        fname = QFileDialog.getOpenFileName(self, "Open JPG File", "", "File (*.jpg)")
        if not fname:
            return
        self.avatarname = fname
        self.Userimage.setPixmap(QPixmap(fname))

    def loadUsers(self):
        with open("avatar/metainfo.txt") as db:
            for line in db:
                tmp = line.split()
                self.userdata.append(tmp)
                self.Userchooser.addItem(tmp[0])

    def showUserInfo(self):
        for user in self.userdata:
            if self.userdata.index(user) == self.Userchooser.currentIndex() - 1:
                self.Username.setText(user[0])
                self.Userage.setValue(int(user[1]))
                if user[2] == 'F':
                    self.Usersex.setCurrentIndex(1)
                else:
                    self.Usersex.setCurrentIndex(0)
                self.Userimage.setPixmap(self.get_avatar(user[0]))

    def updateUserInfo(self):
        userindex = self.Userchooser.currentIndex() - 1
        u = self.serdata[userindex]
        u[0] = unicode(self.Username.displayText())
        u[1] = self.Userage.value()
        if self.Usersex.currentIndex():
            u[2] = 'F'
        else:
            u[2] = 'M'
        with open("avatar/metainfo.txt","w") as db:
            for user in self.userdata:
                for i in range(3):
                    db.write(str(user[i]) + " ")
                db.write("\n")

    def writeuserdata(self):
        with open("avatar/metainfo.txt","w") as db:
            for user in self.userdata:
                for i in range (0,4):
                    db.write(str(user[i]) + " ")
                db.write("\n")

    def clearUserInfo(self):
        self.Username.setText("")
        self.Userage.setValue(0)
        self.Usersex.setCurrentIndex(0)
        self.Userimage.setPixmap(self.defaultimage)

    def addUserInfo(self):
        for user in self.userdata:
            if user[0] == unicode(self.Username.displayText()):
                return
        newuser = []
        newuser.append(unicode(self.Username.displayText()))
        newuser.append(self.Userage.value())
        if self.Usersex.currentIndex():
            newuser.append('F')
        else:
            newuser.append('M')
        if self.avatarname:
            shutil.copy(self.avatarname, 'avatar/' + user[0] + '.jpg')
        self.userdata.append(newuser)
        self.writeuserdata()
        self.Userchooser.addItem(unicode(self.Username.displayText()))


    ############# UTILS
    def warn(self, s):
        QMessageBox.warning(self, "Warning", s)

    def status(self, s=""):
        self.statusBar().showMessage(s)

    def update_all_timer(self):
        s = time_str(self.record_time)
        self.enrollTime.setText(s)
        self.recoTime.setText(s)
        self.convTime.setText(s)

    def dump(self):
        fname = QFileDialog.getSaveFileName(self, "Save Data to:", "", "")
        if fname:
            try:
                self.backend.dump(fname)
            except Exception as e:
                self.warn(str(e))
            else:
                self.status("Dumped to file: " + fname)

    def load(self):
        fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "")
        if fname:
            try:
                self.backend = ModelInterface.load(fname)
            except Exception as e:
                self.warn(str(e))
            else:
                self.status("Loaded from file: " + fname)

    def noise_clicked(self):
        self.recording_noise = not self.recording_noise
        if self.recording_noise:
            self.noiseButton.setText('Stop Recording Noise')
            self.start_record()
        else:
            self.noiseButton.setText('Recording Background Noise')
            self.stop_record()
            signal = np.array(self.recordData, dtype=NPDtype)
            wavfile.write("bg.wav", Main.FS, signal)
            self.backend.init_noise(Main.FS, signal)

    def load_noise(self):
        fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "Wav File  (*.wav)")
        if fname:
            fs, signal = read_wav(fname)
            self.backend.init_noise(fs, signal)

    def load_avatar(self, dirname):
        self.avatars = {}
        for f in glob.glob(dirname + '/*.jpg'):
            name = os.path.basename(f).split('.')[0]
            print f, name
            self.avatars[name] = QPixmap(f)

    def get_avatar(self, username):
        p = self.avatars.get(str(username), None)
        if p:
            return p
        else:
            return self.defaultimage

    def printDebug(self):
        for name, feat in self.backend.features.iteritems():
            print name, len(feat)
        print "GMMs",
        print len(self.backend.gmmset.gmms)
    '''
def feature_re_extract():
    #pdb.set_trace()
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    up_bound = []
    lower_bound = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    '''
    # put all values into -1~1
    up_bound = []
    lower_bound = []
    for j in xrange(len(train_set[0])):
    		up_bound.append(train_set[0][j])
    		lower_bound.append(train_set[0][j])
    
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				up_bound[j] = max(up_bound[j], train_set[i][j])
    				lower_bound[j] = min(lower_bound[j], train_set[i][j])
    
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1
    '''				
    # construct autoencoder
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=39,
        n_hidden=30
    )
    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    # train first autoencoder
    training_epochs = 20
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da(numpy.asarray(train_set)))
    		print 'Training 1st ae epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    # train second autoencoder
    train_set2 = da.get_hidden_values(train_set).eval()
    
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da2 = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=30,
        n_hidden=20
    )
    cost, updates = da2.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da2 = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    training_epochs = 20
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da2(numpy.asarray(train_set2)))
    		print 'Training 2nd ae epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    for c in test_class:
    		m.features[c] = da2.get_hidden_values(da.get_hidden_values(m.features[c]).eval()).eval()
    
    m.train()
    m.dump('model/model_sda.out')
    with open('model/da1.out', 'w') as f:
    		pickle.dump(da, f, -1)
    with open('model/da2.out', 'w') as f:
    		pickle.dump(da2, f, -1)
    return up_bound, lower_bound