Beispiel #1
0
def main():
    _, dic = vad_import()
    items = []
    for key, _ in dic.items():
        items.append(key)  #vad order
    directory = '/export/b13/jlai/scale/vad/open_sat/vad_nist/jeff_vast_dev_3'
    if not os.path.exists(directory):
        os.makedirs(directory)

    #load model
    print('Loading model...')
    model = load_model('LSTM_model_1.h5')

    #Evaluate
    print('Evaluating...')
    batch_size = 1
    input_data = mfcc_array()
    vad_predict = model.predict(input_data, batch_size=batch_size)
    #vad_predict = model.predict_generator(mfcc_gen(), steps=math.ceil(25083/batch_size)) #return numpy array
    vad_original = vad_array()  #numpy array
    print("vad_predict.shape is %s" % (vad_predict.shape, ))
    print("vad_original.shape is %s" % (vad_original.shape, ))
    assert vad_predict.shape == vad_original.shape, "vad shape not same"

    f = open('vad2nist_3', 'wb')
    pickle.dump(vad_predict, f)
    pickle.dump(vad_original, f)
    f.close()
Beispiel #2
0
def hmm_to_file():
    f = open('vad2nist_3', 'rb')
    _ = pickle.load(f)
    vad_original = pickle.load(f)
    f.close()

    hmm_vad = hmm()
    _, dic = vad_import()
    items = []
    for key, _ in dic.items():
        items.append(key)  #vad order
    directory = '/export/b13/jlai/scale/vad/open_sat/vad_nist/jeff_vast_dev_5'

    #masking
    neg_count = 0
    for i in np.arange(0, vad_original.shape[0]):
        for j in np.arange(0, vad_original.shape[1]):
            for k in np.arange(0, vad_original.shape[2]):
                if vad_original[i][j][k] == -1:
                    neg_count += 1
                    hmm_vad[i][j][k] = -1
    print("number of -1 in vad_original is %d" % neg_count)

    #subsample --> sample --> utterance --> whole dataset
    vad_index = index()
    per5, count, utt = 0, 0, []
    for sub_sample in hmm_vad:
        utt.append(sub_sample)
        per5 += 1
        if per5 == vad_index[count]:  #end of an utterance
            utt = np.concatenate(utt, axis=0)
            with open(directory + '/' + items[count] + '.txt', 'w') as f:
                accu, diff = 0., 0.01
                if utt[0] == 1:
                    prev_tag = 'speech'
                if utt[0] == 0:
                    prev_tag = 'non-speech'
                for vad_temp in utt[1:]:
                    if vad_temp == 1:
                        current_tag = 'speech'
                    if vad_temp == 0:
                        current_tag = 'non-speech'
                    if vad_temp == -1:
                        f.write("X\tX\tX\tSAD\t%s\t%.2f\t%.2f\t%s\t1.00\n" %
                                (items[count], accu, accu + diff, prev_tag))
                        break
                    if current_tag == prev_tag:
                        diff += 0.01  #10 ms
                    else:
                        f.write("X\tX\tX\tSAD\t%s\t%.2f\t%.2f\t%s\t1.00\n" %
                                (items[count], accu, accu + diff, prev_tag))
                        prev_tag = current_tag
                        accu = accu + diff
                        diff = 0.01
            f.close()
            per5 = 0
            count += 1
            utt = []
Beispiel #3
0
def eval():
    f = open('vad2nist_3', 'rb')
    vad_predict = pickle.load(f)
    vad_original = pickle.load(f)
    f.close()

    #Convert vad_predict from probability to 0 and 1 and -1 (for masking)
    threshold = 0.50
    r = np.logical_or(vad_original == 0, vad_original == 1)
    result = np.copy(vad_predict)
    result[r] = (vad_predict[r] > threshold)
    result[vad_original == -1] = -1
    vad_predict = result

    _, dic = vad_import()
    items = []
    for key, _ in dic.items():
        items.append(key)  #vad order
    directory = '/export/b13/jlai/scale/vad/open_sat/vad_nist/jeff_vast_dev_4'

    #subsample --> sample --> utterance --> whole dataset
    vad_index = index()
    per5, count, utt = 0, 0, []
    for sub_sample in vad_predict:
        utt.append(sub_sample)
        per5 += 1
        if per5 == vad_index[count]:  #end of an utterance
            utt = np.concatenate(utt, axis=0)
            with open(directory + '/' + items[count] + '.txt', 'w') as f:
                accu, diff = 0., 0.01
                if utt[0] == 1:
                    prev_tag = 'speech'
                if utt[0] == 0:
                    prev_tag = 'non-speech'
                for vad_temp in utt[1:]:
                    if vad_temp == 1:
                        current_tag = 'speech'
                    if vad_temp == 0:
                        current_tag = 'non-speech'
                    if vad_temp == -1:
                        f.write("X\tX\tX\tSAD\t%s\t%.2f\t%.2f\t%s\t1.00\n" %
                                (items[count], accu, accu + diff, prev_tag))
                        break
                    if current_tag == prev_tag:
                        diff += 0.01  #10 ms
                    else:
                        f.write("X\tX\tX\tSAD\t%s\t%.2f\t%.2f\t%s\t1.00\n" %
                                (items[count], accu, accu + diff, prev_tag))
                        prev_tag = current_tag
                        accu = accu + diff
                        diff = 0.01
            f.close()
            per5 = 0
            count += 1
            utt = []
Beispiel #4
0
def nist():
    """
	return mfcc, vad dictionary 
	"""
    _, vad_dic = vad_import()
    mfcc_dic = get_frame()
    #Data Processing
    for key in vad_dic.keys():
        diff = len(vad_dic[key]) - len(mfcc_dic[key])
        if diff > 0:  #vad has more --> omit vad
            temp = vad_dic[key]
            temp = temp[:len(mfcc_dic[key])]
            assert len(temp) == len(mfcc_dic[key])
            vad_dic[key] = temp
        else:  #mfcc has more --> add 0 to vad:
            temp = vad_dic[key]
            for _ in np.arange(np.abs(diff)):
                temp.append([0])
            assert len(temp) == len(mfcc_dic[key])
            vad_dic[key] = temp
        assert len(vad_dic[key]) == len(mfcc_dic[key])

    return vad_dic
Beispiel #5
0
def combine_mfcc_vad():
	_, vad_dic = vad_import()
	mfcc_dic = get_frame()
	#Data Processing 
	for key in vad_dic.keys(): 
		diff = len(vad_dic[key])-len(mfcc_dic[key])