Ejemplo n.º 1
0
Archivo: test.py Proyecto: IMLHF/DASS
def wave_restore_use_data_manager_test():
    data_mixed = mixed_aishell.read_data_sets(
        '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set')
    Y = np.array(data_mixed.train.Y[:10])

    Y = (10**(Y * 8 - 3)) - 0.5
    mPhase = np.tile(np.array(data_mixed.train.X_Theta[:10]), 2)
    complex_Y = Y * np.exp(mPhase * 1j)
    complex_Y = np.array(np.split(complex_Y, 2, axis=-1))

    for i in range(10):
        # reY1 = utils.spectrum_tool.librosa_istft((waveMagSpec1*np.exp(np.array(data_mixed.train.X_Theta[0])*1j)).T,512,256)
        reY1 = utils.spectrum_tool.librosa_istft(complex_Y[0][i].T, 512, 256)
        reY2 = utils.spectrum_tool.librosa_istft(complex_Y[1][i].T, 512, 256)
        tmpY = np.concatenate([reY1, reY2])
        wavefile = wave.open(
            '/mnt/d/tf_recipe/PIT_SYS/utterance_test/rewave' + str(i) + '.wav',
            'wb')
        nchannels = 1
        sampwidth = 2  # 采样位宽,2表示16位
        framerate = 16000
        nframes = len(tmpY)
        comptype = "NONE"
        compname = "not compressed"
        wavefile.setparams(
            (nchannels, sampwidth, framerate, nframes, comptype, compname))
        wavefile.writeframes(np.array(tmpY, dtype=np.int16))
Ejemplo n.º 2
0
def pit_test_for_load():
    batch_size = 32
    # data_dir = '/home/student/work/pit_test/data_small'
    data_dir = '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set'
    data_mixed = mixed_aishell.read_data_sets(data_dir)
    data = data_mixed.test_cc.X_Y
    x_len = len(data)
    total_batch = x_len // batch_size if (x_len % batch_size
                                          == 0) else ((x_len // batch_size) +
                                                      1)
    mse_list = []
    for i in range(total_batch):
        s_site = i * batch_size
        e_site = min(s_site + batch_size, x_len)
        x_y = data[s_site:e_site]
        x = x_y[0]
        y = x_y[1]
        print(np.shape(x))
        y_out = load_model_test(x, y)
        print(y_out)
        print(np.shape(y_out))
        # mse1 = np.mean((y-y_out)**2, (1, 2))
        # y_out_speaker1, y_out_speaker2 = np.split(y_out, 2, axis=-1)
        # y_out_swaped = np.concatenate([y_out_speaker2, y_out_speaker1], axis=-1)
        # mse2 = np.mean((y-y_out_swaped)**2, (1, 2))
        # loss = np.where(mse1 < mse2, mse1, mse2)
        # mse = np.mean(loss)
        # print('Batch %04d MSE : %lf' % (i+1, mse))
        # mse_list.append(mse)
    print('Test Average MSE : %lf' % np.mean(mse_list))
Ejemplo n.º 3
0
def run_framePIT():
    data_dir = '/home/student/work/pit_test/data_small'
    # data_dir = '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set'
    data_mixed = mixed_aishell.read_data_sets(data_dir)

    pit_model = DEEP_SPEECH_SEPARTION(layers_size=[257, 2048, 2048, 2048, 514],
                                      times_width=[7, 1, 1, 1],
                                      loss_fun=loss.frame_PIT_MSE_for_CNN,
                                      learning_rate=0.01,
                                      gpu_list=[1],
                                      name='framePIT')
    pit_model.train(data_mixed.train.X_Y, batch_size=128, epoch=6)
    del pit_model
Ejemplo n.º 4
0
def run_CONV():
    data_dir = '/home/student/work/pit_test/data_small'
    # data_dir = '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set'
    data_mixed = mixed_aishell.read_data_sets(data_dir)
    conv_model = DEEP_SPEECH_SEPARTION(
        layers_size=[257, 2048, 2048, 2048, 514],
        times_width=[7, 1, 1, 1],
        loss_fun=loss.MSE,
        learning_rate=0.01,
        gpu_list=[1],
        name='CONV')
    conv_model.train(data_mixed.train.X_Y, batch_size=128, epoch=6)
    conv_model.test_PIT(data_mixed.test_cc.X_Y, batch_size=128)
    conv_model.save_model()
Ejemplo n.º 5
0
def separate_speech():
    # data_dir = '/home/student/work/pit_test/data_small'
    data_dir = '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set'
    data_mixed = mixed_aishell.read_data_sets(data_dir)
    mixX = np.array(data_mixed.train.X[:10])
    picture_spec((mixX * 8 - 3), 'exp/mixSpeech')
    raw_Y = np.array(data_mixed.train.Y[:10])
    picture_spec((raw_Y * 8 - 3), 'exp/rawCLEAN')
    raw = np.array(data_mixed.train.X[:10], dtype=np.float32)
    pre = load_model_test(raw, None)
    pre = (pre * 8 - 3)
    picture_spec(pre, 'exp/restorePIT')
    for i in range(10):
        np.savetxt('exp/restorePIT.num' + str(i), pre[i])
        np.savetxt('exp/restorePIT.numT' + str(i), pre[i].T)
Ejemplo n.º 6
0
Archivo: test.py Proyecto: IMLHF/DASS
def data_manager_test():
    data_mixed = mixed_aishell.read_data_sets(
        '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set')
    print(data_mixed.train.X_Y[0:2])
    print(np.shape(data_mixed.train.X_Y[0:2]))
    print(np.shape(data_mixed.train.Y[512:512 + 128]))
Ejemplo n.º 7
0
def train():

    data_dir = '/home/student/work/pit_test/data_small'
    # data_dir = '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set'
    data_mixed = mixed_aishell.read_data_sets(data_dir)

    g = tf.Graph()
    with g.as_default():
        with tf.name_scope('model'):
            # with tf.variable_scope('lstm_var',reuse=tf.AUTO_REUSE):
            # tr_model and val_model should share variables
            tr_model = LSTM(FLAGS)
            tf.get_variable_scope().reuse_variables()
            val_model = LSTM(FLAGS)
        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())
        # Prevent exhausting all the gpu memories.
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5
        config.allow_soft_placement = True
        # sess = tf.InteractiveSession(config=config)
        sess = tf.Session(config=config)
        sess.run(init)
        if FLAGS.resume_training.lower() == 'true':
            ckpt = tf.train.get_checkpoint_state(FLAGS.save_dir + '/nnet')
            if ckpt and ckpt.model_checkpoint_path:
                tf.logging.info("restore from" + ckpt.model_checkpoint_path)
                tr_model.saver.restore(sess, ckpt.model_checkpoint_path)
                best_path = ckpt.model_checkpoint_path
            else:
                tf.logging.fatal("checkpoint not found")
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        # g.finalize()

        try:
            # validation before training.
            loss_prev = eval_one_epoch(sess, coord, val_model,
                                       data_mixed.test_cc.X_Y)
            tf.logging.info("CROSSVAL PRERUN AVG.LOSS %.4F" % loss_prev)

            sess.run(tf.assign(tr_model.lr, FLAGS.learning_rate))
            for epoch in range(FLAGS.start_epoch, FLAGS.max_epochs):
                start_time = time.time()

                # Training
                train_X_Y = data_mixed.train.X_Y
                tr_loss = train_one_epoch(sess, coord, tr_model, train_X_Y)

                # Validation
                val_X_Y = data_mixed.test_cc.X_Y
                val_loss = eval_one_epoch(sess, coord, val_model, val_X_Y)

                end_time = time.time()
                # Determine checkpoint path
                ckpt_name = "nnet_iter%d_lrate%e_trloss%.4f_cvloss%.4f" % (
                    epoch + 1, FLAGS.learning_rate, tr_loss, val_loss)
                ckpt_dir = FLAGS.save_dir + '/nnet'
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                ckpt_path = os.path.join(ckpt_dir, ckpt_name)
                # Relative loss between previous and current val_loss
                rel_impr = loss_prev - val_loss / loss_prev
                rel_impr = np.abs(loss_prev - val_loss) / loss_prev
                # Accept or reject new parameters
                if val_loss < loss_prev:
                    tr_model.saver.save(sess, ckpt_path)
                    # Logging train loss along with validation loss
                    loss_prev = val_loss
                    best_path = ckpt_path
                    tf.logging.info(
                        "ITERATION %03d: TRAIN AVG.LOSS %.4f, (lrate%e) CROSSVAL"
                        " AVG.LOSS %.4f, %s (%s), TIME USED: %.2fs" %
                        (epoch + 1, tr_loss, FLAGS.learning_rate, val_loss,
                         "nnet accepted", ckpt_name,
                         (end_time - start_time) / 1))
                else:
                    tr_model.saver.restore(sess, best_path)
                    tf.logging.info(
                        "ITERATION %03d: TRAIN AVG.LOSS %.4f, (lrate%e) CROSSVAL"
                        " AVG.LOSS %.4f, %s, (%s), TIME USED: %.2fs" %
                        (epoch + 1, tr_loss, FLAGS.learning_rate, val_loss,
                         "nnet rejected", ckpt_name,
                         (end_time - start_time) / 1))

                # Start halving when improvement is low
                if rel_impr < FLAGS.start_halving_impr:
                    FLAGS.learning_rate *= FLAGS.halving_factor
                    sess.run(tf.assign(tr_model.lr, FLAGS.learning_rate))

                # Stopping criterion
                if rel_impr < FLAGS.end_halving_impr:
                    if epoch < FLAGS.min_epochs:
                        tf.logging.info(
                            "we were supposed to finish, but we continue as "
                            "min_epochs : %s" % FLAGS.min_epochs)
                        continue
                    else:
                        tf.logging.info(
                            "finished, too small rel. improvement %g" %
                            rel_impr)
                        break
        except Exception as e:
            coord.request_stop(e)
        finally:
            coord.request_stop()
            # Wait for threads to finish.
            coord.join(threads)

        tf.logging.info("Done training")
        sess.close()
Ejemplo n.º 8
0
def decode():
    """Decoding the inputs using current model."""

    # data_dir = '/home/student/work/pit_test/data_small'
    # data_dir = '/mnt/d/tf_recipe/PIT_SYS/utterance_test/speaker_set'
    data_dir = '/mnt/d/tf_recipe/ALL_DATA/aishell/mixed_data_small'
    data_mixed = mixed_aishell.read_data_sets(data_dir)

    with tf.Graph().as_default():
        with tf.name_scope('model'):
            model = LSTM(FLAGS, infer=True)

        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())

        sess = tf.Session()

        sess.run(init)

        ckpt = tf.train.get_checkpoint_state(FLAGS.save_dir + '/nnet')
        if ckpt and ckpt.model_checkpoint_path:
            tf.logging.info("Restore from " + ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            tf.logging.fatal("checkpoint not found.")
            sys.exit(-1)

    speech_num = 10

    speech_start = 100000  # same gender
    # speech_start = 100123 # differ gender
    # speech_start = 202810 # differ gender like norm
    dataset = data_mixed.train

    # speech_start = 3128  # test_cc
    # dataset = data_mixed.test_cc

    X_Y_batch = dataset.X_Y[speech_start:speech_start + speech_num]
    angle_batch = np.array(dataset.X_Theta[speech_start:speech_start +
                                           speech_num])
    x_batch = X_Y_batch[0]
    y_batch = X_Y_batch[1]
    lengths = np.array([np.shape(x_batch)[1]] * np.shape(x_batch)[0])
    cleaned1, cleaned2 = sess.run([model.cleaned1, model.cleaned2],
                                  feed_dict={
                                      model.inputs: x_batch,
                                      model.labels: y_batch,
                                      model.lengths: lengths,
                                  })

    raw_spec1, raw_spec2 = np.split(y_batch, 2, axis=-1)

    cleaned1 = np.array(mixed_aishell.rmNormalization(cleaned1))
    cleaned2 = np.array(mixed_aishell.rmNormalization(cleaned2))
    raw_spec1 = np.array(mixed_aishell.rmNormalization(raw_spec1))
    raw_spec2 = np.array(mixed_aishell.rmNormalization(raw_spec2))
    mixed_spec2 = np.array(mixed_aishell.rmNormalization(x_batch))

    decode_ans_dir = os.path.join(FLAGS.save_dir, 'decode_ans')
    if os.path.exists(decode_ans_dir):
        shutil.rmtree(decode_ans_dir)
    os.makedirs(decode_ans_dir)

    if FLAGS.decode_show_spec:
        cleaned = np.concatenate([cleaned1, cleaned2], axis=-1)
        raw_spec = np.concatenate([raw_spec1, raw_spec2], axis=-1)
        utils.spectrum_tool.picture_spec(np.log10(cleaned + 0.001),
                                         decode_ans_dir + '/restore_spec_')
        utils.spectrum_tool.picture_spec(np.log10(raw_spec + 0.001),
                                         decode_ans_dir + '/raw_spec_')

    spec1 = cleaned1 * np.exp(angle_batch * 1j)
    spec2 = cleaned2 * np.exp(angle_batch * 1j)
    raw_spec1 = raw_spec1 * np.exp(angle_batch * 1j)
    raw_spec2 = raw_spec2 * np.exp(angle_batch * 1j)
    mixed_spec2 = mixed_spec2 * np.exp(angle_batch * 1j)

    for i in range(speech_num):
        # write restore wave
        reY1 = utils.spectrum_tool.librosa_istft(spec1[i].T,
                                                 (FLAGS.input_size - 1) * 2,
                                                 FLAGS.input_size - 1)
        reY2 = utils.spectrum_tool.librosa_istft(spec2[i].T,
                                                 (FLAGS.input_size - 1) * 2,
                                                 FLAGS.input_size - 1)
        reCONY = np.concatenate([reY1, reY2])
        wavefile = wave.open(decode_ans_dir + ('/restore_audio_%03d.wav' % i),
                             'wb')
        nchannels = 1
        sampwidth = 2  # 采样位宽,2表示16位
        framerate = 16000
        nframes = len(reCONY)
        comptype = "NONE"
        compname = "not compressed"
        wavefile.setparams(
            (nchannels, sampwidth, framerate, nframes, comptype, compname))
        wavefile.writeframes(np.array(reCONY, dtype=np.int16))

        # write raw wave
        rawY1 = utils.spectrum_tool.librosa_istft(raw_spec1[i].T,
                                                  (FLAGS.input_size - 1) * 2,
                                                  FLAGS.input_size - 1)
        rawY2 = utils.spectrum_tool.librosa_istft(raw_spec2[i].T,
                                                  (FLAGS.input_size - 1) * 2,
                                                  FLAGS.input_size - 1)
        rawCONY = np.concatenate([rawY1, rawY2])
        wavefile = wave.open(decode_ans_dir + ('/raw_audio_%03d.wav' % i),
                             'wb')
        nframes = len(rawCONY)
        wavefile.setparams(
            (nchannels, sampwidth, framerate, nframes, comptype, compname))
        wavefile.writeframes(np.array(rawCONY, dtype=np.int16))

        # write mixed wave
        mixedWave = utils.spectrum_tool.librosa_istft(
            mixed_spec2[i].T, (FLAGS.input_size - 1) * 2, FLAGS.input_size - 1)
        wavefile = wave.open(decode_ans_dir + ('/mixed_audio_%03d.wav' % i),
                             'wb')
        nframes = len(mixedWave)
        wavefile.setparams(
            (nchannels, sampwidth, framerate, nframes, comptype, compname))
        wavefile.writeframes(np.array(mixedWave, dtype=np.int16))

        # wave picture
        utils.spectrum_tool.picture_wave(
            reCONY, decode_ans_dir + ('/restore_wav_%03d' % i), 16000)
        utils.spectrum_tool.picture_wave(
            rawCONY, decode_ans_dir + ('/raw_wav_%03d' % i), 16000)

    tf.logging.info("Done decoding.")
    sess.close()
    ''''''