예제 #1
0
def train():

    lr = 0.002
    c = ClassificationModel()
    cost, normVector, alfas_mean = c.buildModel()

    gStep = tf.Variable(tf.constant(0))
    learning_rate = tf.train.exponential_decay(float(lr), gStep, 300, 0.9, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    gradients, vars = zip(*optimizer.compute_gradients(cost))  #var_list=train_vars
    gradients, _ = tf.clip_by_global_norm(gradients, 100)
    train_op = optimizer.apply_gradients(zip(gradients, vars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # saver = tf.train.Saver(var_list=restore_vars)    #var_list=restore_vars
        # saver.restore(sess, 'step/model.ckpt-'+ckpoint)
        saver = tf.train.Saver()

        for step in range(10000):
            mels, ids = c.getBatch_data_label(10)
            feed_dict = {c.melInputs:mels, c.labelInputs: ids, gStep: step}
            _, _lr, _loss, _alfas_mean = sess.run([train_op, learning_rate, cost, alfas_mean], feed_dict=feed_dict)
            print('{}: lr = {:.6f}, loss = {}, alfas_mean = {}'.format(step, _lr, _loss, _alfas_mean))

            if(step%300==0 and step!=0):
                saver.save(sess, 'step/model.ckpt', global_step=step)
예제 #2
0
def test():
    c = ClassificationModel()
    _, normVector, _ = c.buildModel(is_training=False)

    restore_vars = []
    for var in tf.global_variables():
        if('temp' not in var.name):
            restore_vars.append(var)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(var_list=restore_vars)
        saver.restore(sess, 'step/model.ckpt-2700')

        speaker_path = '/home/logview/workspace/projects/TI-SV/samples'
        utterance_specs = []
        files = os.listdir(speaker_path)
        files.sort()
        for utter_name in files:

            utter_path = os.path.join(speaker_path, utter_name)         # path of each utterance
            utter, sr = librosa.core.load(utter_path, 16000)        # load utterance audio
            utter_trim, index = librosa.effects.trim(utter, top_db=20)  # voice activity detection, only trim


            S = librosa.feature.mfcc(y=utter_trim, sr=sr, n_mfcc=40)
            inputs = S.transpose((1,0))[:160]
            print(inputs.shape)
            utterance_specs.append(inputs)

        utterance_specs = np.array(utterance_specs)
        print(utterance_specs.shape)

        vectors = sess.run(normVector, feed_dict={c.melInputs:utterance_specs})
        similar(vectors)