def visualize(model, test_file, train_desc_file):
    """ Get the prediction using the model, and visualize softmax outputs
    Params:
        model (keras.models.Model): Trained speech model
        test_file (str): Path to an audio clip
        train_desc_file(str): Path to the training file used to train this
                              model
    """
    datagen = DataGenerator()
    datagen.load_train_data(train_desc_file)
    datagen.fit_train(100)

    print("Compiling test function...")
    test_fn = compile_output_fn(model)

    inputs = [datagen.featurize(test_file)]

    prediction = np.squeeze(test_fn([inputs, True]))
    softmax_file = "softmax.npy".format(test_file)
    softmax_img_file = "softmax.png".format(test_file)
    print("Prediction: {}".format(argmax_decode(prediction)))
    print("Saving network output to: {}".format(softmax_file))
    print("As image: {}".format(softmax_img_file))
    np.save(softmax_file, prediction)
    sm = softmax(prediction.T)
    sm = np.vstack((sm[0], sm[2], sm[3:][::-1]))
    fig, ax = plt.subplots()
    ax.pcolor(sm, cmap=plt.cm.Greys_r)
    column_labels = [chr(i) for i in range(97, 97 + 26)] + ['space', 'blank']
    ax.set_yticks(np.arange(sm.shape[0]) + 0.5, minor=False)
    ax.set_yticklabels(column_labels[::-1], minor=False)
    plt.savefig(softmax_img_file)
Beispiel #2
0
def validation(model, val_fn, decode_fn, datagen, mb_size=64):
    """ Validation routine for speech-models
    Params:
        model (keras.model): Constructed keras model
        val_fn (theano.function): A theano function that calculates the cost
            over a validation set
        datagen (DataGenerator)
        mb_size (int): Size of each minibatch
    Returns:
        val_cost (float): Average validation cost over the whole validation set
    """
    avg_cost = 0.0
    avg_acc = 0.0
    i = 0
    for batch in datagen.iterate_validation(mb_size):
        inputs = batch['x']
        labels = batch['y']
        input_lengths = batch['input_lengths']
        label_lengths = batch['label_lengths']
        texts = batch['texts']
        # print('labels:'+str(labels))
        # Due to convolution, the number of timesteps of the output
        # is different from the input length. Calculate the resulting
        # timesteps
        ctc_input_lens = ctc_input_length(model, input_lengths)
        # print('ctc_input_lens_pre:'+str(ctc_input_lens))
        prediction, ctc_cost = val_fn(
            [inputs, ctc_input_lens, labels, label_lengths, True])
        # print(labels)
        # prediction = np.swapaxes(prediction, 0, 1)
        predict_str = argmax_decode(prediction, decode_fn, ctc_input_lens)

        # print('predict_str:'+str(predict_str))
        avg_cost += ctc_cost.mean()
        print('predict_str:' + str(predict_str))
        print('texts:' + str(texts))
        acc_sum = 0
        for index, text in enumerate(texts):
            sm = edit_distance.SequenceMatcher(a=text, b=predict_str[index])
            acc = 1.0 - sm.distance() / len(text)
            acc_sum = acc_sum + acc
        avg_acc += acc_sum * 1.0 / (index + 1)
        i += 1
    if i == 0:
        return 0.0, 0.0
    return avg_cost / i, avg_acc / i
def test(model, test_fn, decode_fn, datagen, mb_size=16, conv_context=11,
         conv_border_mode='valid', conv_stride=2):
    """ Testing routine for speech-models
    Params:
        model (keras.model): Constructed keras model
        test_fn (theano.function): A theano function that calculates the cost
            over a test set
        datagen (DataGenerator)
        mb_size (int): Size of each minibatch
        conv_context (int): Convolution context
        conv_border_mode (str): Convolution border mode
        conv_stride (int): Convolution stride
    Returns:
        test_cost (float): Average test cost over the whole test set
    """
    avg_cost = 0.0
    i = 0
    acc_list = []
    for batch in datagen.iterate_test(mb_size):
        inputs = batch['x']
        labels = batch['y']
        input_lengths = batch['input_lengths']
        label_lengths = batch['label_lengths']
        ground_truth = batch['texts']
        # Due to convolution, the number of timesteps of the output
        # is different from the input length. Calculate the resulting
        # timesteps
        # output_lengths = [conv_output_length(l, conv_context,
        #                                      conv_border_mode, conv_stride)
        #                   for l in input_lengths]
        ctc_input_lens = ctc_input_length(model, input_lengths)
        prediction, ctc_cost = test_fn([inputs, ctc_input_lens, labels,
                                        label_lengths, True])
        # predictions = np.swapaxes(predictions, 0, 1)
        prediction_str = argmax_decode(prediction, decode_fn, ctc_input_lens)
        for i, prediction in enumerate(prediction_str):
            truth = ground_truth[i]
            sm = edit_distance.SequenceMatcher(a=truth,b=prediction)
            acc = 1 - sm.distance()/len(truth) 
            acc_list.append(acc)
            print("Truth: {}, Prediction: {}, acc: {}".format(truth, prediction, acc))
    print(acc_list) 
    print('avg_acc:'+str(np.array(acc_list).mean()))
    return ''
Beispiel #4
0
def test(model,
         test_fn,
         datagen,
         mb_size=16,
         conv_context=11,
         conv_border_mode='valid',
         conv_stride=2):
    """ Testing routine for speech-models
    Params:
        model (keras.model): Constructed keras model
        test_fn (theano.function): A theano function that calculates the cost
            over a test set
        datagen (DataGenerator)
        mb_size (int): Size of each minibatch
        conv_context (int): Convolution context
        conv_border_mode (str): Convolution border mode
        conv_stride (int): Convolution stride
    Returns:
        test_cost (float): Average test cost over the whole test set
    """
    avg_cost = 0.0
    i = 0
    for batch in datagen.iterate_test(mb_size):
        inputs = batch['x']
        labels = batch['y']
        input_lengths = batch['input_lengths']
        label_lengths = batch['label_lengths']
        ground_truth = batch['texts']
        # Due to convolution, the number of timesteps of the output
        # is different from the input length. Calculate the resulting
        # timesteps
        output_lengths = [
            conv_output_length(l, conv_context, conv_border_mode, conv_stride)
            for l in input_lengths
        ]
        predictions, ctc_cost = test_fn(
            [inputs, output_lengths, labels, label_lengths, True])
        predictions = np.swapaxes(predictions, 0, 1)
        for i, prediction in enumerate(predictions):
            print("Truth: {}, Prediction: {}".format(
                ground_truth[i], argmax_decode(prediction)))
        avg_cost += ctc_cost
        i += 1
    return avg_cost / i
def interactive_vis(model_dir, train_desc_file, weights_file=None):
    """ Get the prediction using the model, and visualize softmax outputs, able
    to predict multiple inputs.
    Params:
        model_dir (str): Trained speech model or None. If None given will ask
            code to make model.
        train_desc_file(str): Path to the training file used to train this
                              model
        weights_file(str): Path to stored weights file for model being made
    """

    datagen = DataGenerator()
    datagen.load_train_data(train_desc_file)
    datagen.fit_train(100)

    if model_dir is None:
        assert weights_file is not None
        print("""Make and store new model into model, e.g.
               >>> model_wrp = HalfPhonemeModelWrapper()
               >>> model = model_wrp.compile(nodes=1000, recur_layers=5,
                                             conv_context=5)
               """)

        model = prompt_loop('[model=]> ', locals())['model']
        model.load_weights(weights_file)
    else:
        model = load_model(model_dir, weights_file)

    print("""Make and store test function to test_fn, e.g.
           >>> test_fn = model_wrp.compile_output_fn()
           """)
    test_fn = prompt_loop('[test_fn=]> ', locals())['test_fn']

    while True:
        try:
            test_file = raw_input('Input file: ')
        except EOFError:
            comm_mode = True
            while comm_mode:
                try:
                    comm = raw_input("[w: load wieghts\t s: shell ] > ")
                    if comm.strip() == 'w':
                        w_path = raw_input("weights file path: ").strip()
                        model.load_weights(w_path)
                    if comm.strip() == 's':
                        prompt_loop('> ', locals())
                except EOFError:
                    comm_mode = False
                except Exception as exc:
                    print(exc)
            continue

        if test_file.strip() == '':
            break

        try:
            inputs = [datagen.featurize(test_file)]
        except Exception as exc:
            print(exc)
            continue

        prediction = np.squeeze(test_fn([inputs, True]))

        softmax_file = "softmax.npy".format(test_file)
        softmax_img_file = "softmax.png".format(test_file)
        print("Prediction: {}".format(argmax_decode(prediction)))
        print("Saving network output to: {}".format(softmax_file))
        print("As image: {}".format(softmax_img_file))
        np.save(softmax_file, prediction)
        sm = softmax(prediction.T)
        sm = np.vstack((sm[0], sm[2], sm[3:][::-1]))
        fig, ax = plt.subplots()
        ax.pcolor(sm, cmap=plt.cm.Greys_r)
        column_labels = [chr(i)
                         for i in range(97, 97 + 26)] + ['space', 'blank']
        ax.set_yticks(np.arange(sm.shape[0]) + 0.5, minor=False)
        ax.set_yticklabels(column_labels[::-1], minor=False)
        plt.savefig(softmax_img_file)
Beispiel #6
0
def test(model,
         test_fn,
         datagen,
         result_file,
         mb_size=16,
         conv_context=11,
         conv_border_mode='valid',
         conv_stride=2):
    # def test(model, test_fn, datagen, result_file, mb_size=16):

    total_distance = 0
    total_length = 0
    wf = open(result_file, 'w')
    for batch in datagen.iterate_test(mb_size):
        inputs = batch['x']
        labels = batch['y']
        input_lengths = batch['input_lengths']
        label_lengths = batch['label_lengths']
        ground_truth = batch['texts']

        output_lengths = [
            conv_output_length(l, conv_context, conv_border_mode, conv_stride)
            for l in input_lengths
        ]
        predictions, ctc_cost = test_fn(
            [inputs, output_lengths, labels, label_lengths, True])

        # ctc_in_length = ctc_input_length(model, input_lengths)
        # predictions, ctc_cost = test_fn([inputs, ctc_in_length, labels,
        #                                 label_lengths, False])
        predictions = np.swapaxes(predictions, 0, 1)
        for i, prediction in enumerate(predictions):
            truth = ground_truth[i]
            # 最佳结果
            pre_prediction = argmax_decode(prediction)
            # 前三结果
            preds = prefix_beam_search(lm_model,
                                       matrix_same_delete(prediction), 100, 3)

            max_pred_precision = []
            for pred in preds:
                max_pred_precision.append(pred[1])
            # 求三个中的最大概率
            max_index = max_pred_precision.index(max(max_pred_precision))
            # 获取三个中概率最大的字符串
            best_pred_str = preds[max_index][0]
            # 计算标签和概率最大字符串的编辑距离
            sm = edit_distance.SequenceMatcher(a=truth, b=best_pred_str)
            sm2 = edit_distance.SequenceMatcher(a=truth, b=pre_prediction)
            total_distance += sm.distance()
            total_length += len(truth)
            content = json.loads('{}')
            content['label'] = truth
            content['text'] = best_pred_str
            content['lm_distance'] = sm.distance() / len(truth)
            content['no_lm'] = pre_prediction
            content['no_lm_distance'] = sm2.distance() / len(truth)
            __write_and_print(wf, json.dumps(content, ensure_ascii=False))

    total_distance_rate = -1 if total_length == 0 else float(
        total_distance) / total_length
    print('total_distance_rate:%s' % total_distance_rate)
    wf.close()