Example #1
0
        def run_test(init_op, dataset):
            wav_filenames = []
            losses = []
            predictions = []
            ground_truths = []

            bar = create_progressbar(prefix='Test epoch | ',
                                     widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start()
            log_progress('Test epoch...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    batch_wav_filenames, batch_logits, batch_loss, batch_lengths, batch_transcripts = \
                        session.run([batch_wav_filename, transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                decoded = ctc_beam_search_decoder_batch(batch_logits, batch_lengths, Config.alphabet, FLAGS.beam_width,
                                                        num_processes=num_processes, scorer=scorer,
                                                        cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n)
                predictions.extend(d[0][1] for d in decoded)
                ground_truths.extend(sparse_tensor_value_to_texts(batch_transcripts, Config.alphabet))
                wav_filenames.extend(wav_filename.decode('UTF-8') for wav_filename in batch_wav_filenames)
                losses.extend(batch_loss)

                step_count += 1
                bar.update(step_count)

            bar.finish()

            wer, cer, samples = calculate_report(wav_filenames, ground_truths, predictions, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)

            print('Test on %s - WER: %f, CER: %f, loss: %f' %
                  (dataset, wer, cer, mean_loss))
            print('-' * 80)
            for sample in report_samples:
                print('WER: %f, CER: %f, loss: %f' %
                      (sample.wer, sample.cer, sample.loss))
                print(' - wav: file://%s' % sample.wav_filename)
                print(' - src: "%s"' % sample.src)
                print(' - res: "%s"' % sample.res)
                print('-' * 80)

            return samples
Example #2
0
def main(args, _):
    manager = Manager()
    work_todo = JoinableQueue()   # this is where we are going to store input data
    work_done = manager.Queue()  # this where we are gonna push them out

    processes = []
    for i in range(args.proc):
        worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
        worker_process.start()        # Launch reader() as a separate python process
        processes.append(worker_process)

    print([x.name for x in processes])

    wavlist = []
    ground_truths = []
    predictions = []
    losses = []
    wav_filenames = []

    with open(args.csv, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        count = 0
        for row in csvreader:
            count += 1
            # Relative paths are relative to the folder the CSV file is in
            if not os.path.isabs(row['wav_filename']):
                row['wav_filename'] = os.path.join(os.path.dirname(args.csv), row['wav_filename'])
            work_todo.put({'filename': row['wav_filename'], 'transcript': row['transcript']})
            wav_filenames.extend(row['wav_filename'])

    print('Totally %d wav entries found in csv\n' % count)
    work_todo.join()
    print('\nTotally %d wav file transcripted' % work_done.qsize())

    while not work_done.empty():
        msg = work_done.get()
        losses.append(0.0)
        ground_truths.append(msg['ground_truth'])
        predictions.append(msg['prediction'])
        wavlist.append(msg['wav'])

    wer, cer, samples = calculate_report(wav_filenames, ground_truths, predictions, losses)
    mean_loss = np.mean(losses)

    print('Test - WER: %f, CER: %f, loss: %f' %
          (wer, cer, mean_loss))

    if args.dump:
        with open(args.dump + '.txt', 'w') as ftxt, open(args.dump + '.out', 'w') as fout:
            for wav, txt, out in zip(wavlist, ground_truths, predictions):
                ftxt.write('%s %s\n' % (wav, txt))
                fout.write('%s %s\n' % (wav, out))
            print('Reference texts dumped to %s.txt' % args.dump)
            print('Transcription   dumped to %s.out' % args.dump)
def main():
    parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
    parser.add_argument('--model', required=True,
                        help='Path to the model (protocol buffer binary file)')
    parser.add_argument('--alphabet', required=True,
                        help='Path to the configuration file specifying the alphabet used by the network')
    parser.add_argument('--lm', required=True,
                        help='Path to the language model binary file')
    parser.add_argument('--trie', required=True,
                        help='Path to the language model trie file created with native_client/generate_trie')
    parser.add_argument('--csv', required=True,
                        help='Path to the CSV source file')
    parser.add_argument('--proc', required=False, default=cpu_count(), type=int,
                        help='Number of processes to spawn, defaulting to number of CPUs')
    args = parser.parse_args()

    work_todo = JoinableQueue()   # this is where we are going to store input data
    work_done = Queue()  # this where we are gonna push them out

    processes = []
    for i in range(args.proc):
        worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
        worker_process.start()        # Launch reader() as a separate python process
        processes.append(worker_process)

    print([x.name for x in processes])

    ground_truths = []
    predictions = []
    losses = []

    with open(args.csv, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        for row in csvreader:
            work_todo.put({'filename': row['wav_filename'], 'transcript': row['transcript']})
    work_todo.join()

    while (not work_done.empty()):
        msg = work_done.get()
        losses.append(0.0)
        ground_truths.append(msg['ground_truth'])
        predictions.append(msg['prediction'])

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
    mean_loss = np.mean(losses)

    print('Test - WER: %f, CER: %f, loss: %f' %
          (wer, cer, mean_loss))
Example #4
0
        def run_test(init_op, dataset):
            logitses = []
            losses = []
            seq_lengths = []
            ground_truths = []

            bar = create_progressbar(prefix='Computing acoustic model predictions | ',
                                     widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start()
            log_progress('Computing acoustic model predictions...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    logits, loss_, lengths, transcripts = session.run([transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                step_count += 1
                bar.update(step_count)

                logitses.append(logits)
                losses.extend(loss_)
                seq_lengths.append(lengths)
                ground_truths.extend(sparse_tensor_value_to_texts(transcripts, Config.alphabet))

            bar.finish()

            predictions = []

            bar = create_progressbar(max_value=step_count,
                                     prefix='Decoding predictions | ').start()
            log_progress('Decoding predictions...')

            # Second pass, decode logits and compute WER and edit distance metrics
            for logits, seq_length in bar(zip(logitses, seq_lengths)):
                decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width,
                                                        num_processes=num_processes, scorer=scorer)
                predictions.extend(d[0][1] for d in decoded)

            distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

            wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)

            print('Test on %s - WER: %f, CER: %f, loss: %f' %
                  (dataset, wer, cer, mean_loss))
            print('-' * 80)
            for sample in report_samples:
                print('WER: %f, CER: %f, loss: %f' %
                      (sample.wer, sample.distance, sample.loss))
                print(' - src: "%s"' % sample.src)
                print(' - res: "%s"' % sample.res)
                print('-' * 80)

            return samples
Example #5
0
        def run_test(init_op, dataset):
            wav_filenames = []
            losses = []
            predictions = []
            ground_truths = []

            bar = create_progressbar(prefix='Test epoch | ',
                                     widgets=[
                                         'Steps: ',
                                         progressbar.Counter(), ' | ',
                                         progressbar.Timer()
                                     ]).start()
            log_progress('Test epoch...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    batch_wav_filenames, batch_logits, batch_loss, batch_lengths, batch_transcripts = \
                        session.run([batch_wav_filename, transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                decoded = ctc_beam_search_decoder_batch(
                    batch_logits,
                    batch_lengths,
                    Config.alphabet,
                    FLAGS.beam_width,
                    num_processes=num_processes,
                    scorer=scorer,
                    cutoff_prob=FLAGS.cutoff_prob,
                    cutoff_top_n=FLAGS.cutoff_top_n)
                predictions.extend(d[0][1] for d in decoded)
                ground_truths.extend(
                    sparse_tensor_value_to_texts(batch_transcripts,
                                                 Config.alphabet))
                wav_filenames.extend(
                    wav_filename.decode('UTF-8')
                    for wav_filename in batch_wav_filenames)
                losses.extend(batch_loss)

                step_count += 1
                bar.update(step_count)

            bar.finish()

            wer, cer, samples = calculate_report(wav_filenames, ground_truths,
                                                 predictions, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)
            report_samples2 = itertools.islice(samples, FLAGS.report_count2)

            print('Test on %s - WER: %f, CER: %f, loss: %f' %
                  (dataset, wer, cer, mean_loss))
            print('-' * 80)
            for sample in report_samples:
                print('WER: %f, CER: %f, loss: %f' %
                      (sample.wer, sample.cer, sample.loss))
                print(' - wav: file://%s' % sample.wav_filename)
                print(' - src: "%s"' % sample.src)
                print(' - res: "%s"' % sample.res)
                print('-' * 80)

            wer_sum = 0
            avg_wer = 0
            result_csv = os.path.join(FLAGS.result_dir, "result.csv")
            with open(result_csv, 'w', encoding='utf-8') as f:
                writer = mycsv.DictWriter(f,
                                          fieldnames=['wav_filename', 'text'])
                writer.writeheader()
                for sample in report_samples2:
                    #writer.writerow("WER: ",sample.wer, "CER: ", sample.cer, "loss: ", sample.loss)
                    writer.writerow({
                        "wav_filename": sample.wav_filename,
                        "text": sample.src
                    })
                    writer.writerow({
                        "wav_filename": sample.wav_filename,
                        "text": sample.res
                    })
                    #                    writer.writerow("src: ", sample.src)
                    #                    writer.writerow("res: ", sample.res)
                    #                    writer.writerow("-"*80)
                    wer_sum += sample.wer
                avg_wer = wer_sum / FLAGS.report_count2
                writer.writerow({"wav_filename": avg_wer, "text": avg_wer})

            print("*************avg_wer*************", avg_wer)

            return samples
Example #6
0
        def run_test(init_op, dataset):
            wav_filenames = []
            losses = []
            predictions = []
            ground_truths = []

            bar = create_progressbar(prefix='Test epoch | ',
                                     widgets=[
                                         'Steps: ',
                                         progressbar.Counter(), ' | ',
                                         progressbar.Timer()
                                     ]).start()
            log_progress('Test epoch...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    batch_wav_filenames, batch_logits, batch_loss, batch_lengths, batch_transcripts = \
                        session.run([batch_wav_filename, transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                decoded = ctc_beam_search_decoder_batch(
                    batch_logits,
                    batch_lengths,
                    Config.alphabet,
                    FLAGS.beam_width,
                    num_processes=num_processes,
                    scorer=scorer,
                    cutoff_prob=FLAGS.cutoff_prob,
                    cutoff_top_n=FLAGS.cutoff_top_n)
                predictions.extend(d[0][1] for d in decoded)
                ground_truths.extend(
                    sparse_tensor_value_to_texts(batch_transcripts,
                                                 Config.alphabet))
                wav_filenames.extend(
                    wav_filename.decode('UTF-8')
                    for wav_filename in batch_wav_filenames)
                losses.extend(batch_loss)

                step_count += 1
                bar.update(step_count)

            bar.finish()

            wer, cer, samples = calculate_report(wav_filenames, ground_truths,
                                                 predictions, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)

            if verbose:
                print('Test on %s - WER: %f, CER: %f, loss: %f' %
                      (dataset, wer, cer, mean_loss))
            if verbose: print('-' * 80)

            if result_file:
                pruning_type = 'score-based' if not random else 'random'
                result_string = '''Results for evaluating model with pruning percentage of {}% and {} pruning:
                Test on {} - WER: {}, CER: {}, loss: {}
                '''.format(prune_percentage * 100, pruning_type, dataset, wer,
                           cer, mean_loss)
                write_to_file(result_file, result_string, 'a+')

            return wer, cer, mean_loss
def main():
    parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
    parser.add_argument('--model',
                        required=True,
                        help='Path to the model (protocol buffer binary file)')
    parser.add_argument(
        '--alphabet',
        required=True,
        help=
        'Path to the configuration file specifying the alphabet used by the network'
    )
    parser.add_argument('--lm',
                        required=True,
                        help='Path to the language model binary file')
    parser.add_argument(
        '--trie',
        required=True,
        help=
        'Path to the language model trie file created with native_client/generate_trie'
    )
    parser.add_argument('--csv',
                        required=True,
                        help='Path to the CSV source file')
    parser.add_argument(
        '--proc',
        required=False,
        default=cpu_count(),
        type=int,
        help='Number of processes to spawn, defaulting to number of CPUs')
    parser.add_argument(
        '--dump',
        required=False,
        action='store_true',
        default=False,
        help=
        'Dump the results as text file, with one line for each wav: "wav transcription"'
    )
    args = parser.parse_args()

    manager = Manager()
    work_todo = JoinableQueue(
    )  # this is where we are going to store input data
    work_done = manager.Queue()  # this where we are gonna push them out

    processes = []
    for i in range(args.proc):
        worker_process = Process(target=tflite_worker,
                                 args=(args.model, args.alphabet, args.lm,
                                       args.trie, work_todo, work_done, i),
                                 daemon=True,
                                 name='tflite_process_{}'.format(i))
        worker_process.start()  # Launch reader() as a separate python process
        processes.append(worker_process)

    print([x.name for x in processes])

    wavlist = []
    ground_truths = []
    predictions = []
    losses = []

    with open(args.csv, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        count = 0
        for row in csvreader:
            count += 1
            work_todo.put({
                'filename': row['wav_filename'],
                'transcript': row['transcript']
            })
    print('Totally %d wav entries found in csv\n' % count)
    work_todo.join()
    print('\nTotally %d wav file transcripted' % work_done.qsize())

    while not work_done.empty():
        msg = work_done.get()
        losses.append(0.0)
        ground_truths.append(msg['ground_truth'])
        predictions.append(msg['prediction'])
        wavlist.append(msg['wav'])

    wer, cer, _ = calculate_report(ground_truths, predictions, losses)
    mean_loss = np.mean(losses)

    print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))

    if args.dump:
        with open(args.csv + '.txt',
                  'w') as ftxt, open(args.csv + '.out', 'w') as fout:
            for wav, txt, out in zip(wavlist, ground_truths, predictions):
                ftxt.write('%s %s\n' % (wav, txt))
                fout.write('%s %s\n' % (wav, out))
            print('Reference texts dumped to %s.txt' % args.csv)
            print('Transcription   dumped to %s.out' % args.csv)
Example #8
0
def evaluate(test_csvs, create_model, try_loading):
    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                    FLAGS.lm_trie_path, Config.alphabet)

    test_set = create_dataset(test_csvs,
                              batch_size=FLAGS.test_batch_size,
                              cache_path=FLAGS.test_cached_features_path)
    it = test_set.make_one_shot_iterator()

    (batch_x, batch_x_len), batch_y = it.get_next()

    # One rate per layer
    no_dropout = [None] * 6
    logits, _ = create_model(batch_x=batch_x,
                             seq_length=batch_x_len,
                             dropout=no_dropout)

    # Transpose to batch major and apply softmax for decoder
    transposed = tf.nn.softmax(tf.transpose(logits, [1, 0, 2]))

    loss = tf.nn.ctc_loss(labels=batch_y,
                          inputs=logits,
                          sequence_length=batch_x_len)

    global_step = tf.train.get_or_create_global_step()

    with tf.Session(config=Config.session_config) as session:
        # Create a saver using variables from the above newly created graph
        saver = tf.train.Saver()

        # Restore variables from training checkpoint
        loaded = try_loading(session, saver, 'best_dev_checkpoint',
                             'best validation')
        if not loaded:
            loaded = try_loading(session, saver, 'checkpoint', 'most recent')
        if not loaded:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        logitses = []
        losses = []
        seq_lengths = []
        ground_truths = []

        print('Computing acoustic model predictions...')
        bar = progressbar.ProgressBar(widgets=[
            'Steps: ',
            progressbar.Counter(), ' | ',
            progressbar.Timer()
        ])

        step_count = 0

        # First pass, compute losses and transposed logits for decoding
        while True:
            try:
                logits, loss_, lengths, transcripts = session.run(
                    [transposed, loss, batch_x_len, batch_y])
            except tf.errors.OutOfRangeError:
                break

            step_count += 1
            bar.update(step_count)

            logitses.append(logits)
            losses.extend(loss_)
            seq_lengths.append(lengths)
            ground_truths.extend(
                sparse_tensor_value_to_texts(transcripts, Config.alphabet))

    bar.finish()

    predictions = []

    # Get number of accessible CPU cores for this process
    try:
        num_processes = cpu_count()
    except:
        num_processes = 1

    print('Decoding predictions...')
    bar = progressbar.ProgressBar(max_value=step_count,
                                  widget=progressbar.AdaptiveETA)

    # Second pass, decode logits and compute WER and edit distance metrics
    for logits, seq_length in bar(zip(logitses, seq_lengths)):
        decoded = ctc_beam_search_decoder_batch(logits,
                                                seq_length,
                                                Config.alphabet,
                                                FLAGS.beam_width,
                                                num_processes=num_processes,
                                                scorer=scorer)
        predictions.extend(d[0][1] for d in decoded)

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances,
                                         losses)
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)

    print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
    print('-' * 80)
    for sample in report_samples:
        print('WER: %f, CER: %f, loss: %f' %
              (sample.wer, sample.distance, sample.loss))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)

    return samples
Example #9
0
def evaluate(test_data, inference_graph):
    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                    FLAGS.lm_trie_path, Config.alphabet)

    def create_windows(features):
        num_strides = len(features) - (Config.n_context * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2 * Config.n_context + 1
        features = np.lib.stride_tricks.as_strided(
            features, (num_strides, window_size, Config.n_input),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        return features

    # Create overlapping windows over the features
    test_data['features'] = test_data['features'].apply(create_windows)

    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, layers = inference_graph
        layer_4 = layers['rnn_output']
        layer_5 = layers['layer_5']
        layer_6 = layers['layer_6']
        # Transpose to batch major for decoder
        transposed = tf.transpose(outputs['outputs'], [1, 0, 2])

        labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None],
                                   name="labels")
        label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size],
                                          name="label_lengths")

        # We add 1 to all elements of the transcript to avoid any zero values
        # since we use that as an end-of-sequence token for converting the batch
        # into a SparseTensor. So here we convert the placeholder back into a
        # SparseTensor and subtract ones to get the real labels.
        sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph)
        neg_ones = tf.SparseTensor(sparse_labels.indices,
                                   -1 * tf.ones_like(sparse_labels.values),
                                   sparse_labels.dense_shape)
        sparse_labels = tf.sparse_add(sparse_labels, neg_ones)

        loss = tf.nn.ctc_loss(labels=sparse_labels,
                              inputs=layers['raw_logits'],
                              sequence_length=inputs['input_lengths'])

        # Create a saver using variables from the above newly created graph
        mapping = {
            v.op.name: v
            for v in tf.global_variables()
            if not v.op.name.startswith('previous_state_')
        }
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        logitses = []
        losses = []
        ## To Print the embeddings
        layer_4s = []
        layer_5s = []
        layer_6s = []

        print('Computing acoustic model predictions...')
        batch_count = len(test_data) // FLAGS.test_batch_size
        print('Batch Count: ', batch_count)
        bar = progressbar.ProgressBar(max_value=batch_count,
                                      widget=progressbar.AdaptiveETA)

        # First pass, compute losses and transposed logits for decoding
        for batch in bar(split_data(test_data, FLAGS.test_batch_size)):
            session.run(outputs['initialize_state'])
            #TODO: Need to remove it to generalize for greater batch size!
            assert FLAGS.test_batch_size == 1, 'Embedding Extraction will only work for Batch Size = 1 for now!'

            features = pad_to_dense(batch['features'].values)
            features_len = batch['features_len'].values
            labels = pad_to_dense(batch['transcript'].values + 1)
            label_lengths = batch['transcript_len'].values

            logits, loss_, lay4, lay5, lay6 = session.run(
                [transposed, loss, layer_4, layer_5, layer_6],
                feed_dict={
                    inputs['input']: features,
                    inputs['input_lengths']: features_len,
                    labels_ph: labels,
                    label_lengths_ph: label_lengths
                })

            logitses.append(logits)
            losses.extend(loss_)
            layer_4s.append(lay4)
            layer_5s.append(lay5)
            layer_6s.append(lay6)
            print('Saving to Files: ')
            #lay4.tofile('embeddings/lay4.txt')
            #lay5.tofile('embeddings/lay5.txt')
            #lay6.tofile('embeddings/lay6.txt')
            #            np.save('embeddings/lay41.npy', lay4)
            filename = batch.fname.iloc[0]
            save_np_array(lay4, Config.LAYER4 + filename + '.npy')
            save_np_array(lay5, Config.LAYER5 + filename + '.npy')
            save_np_array(lay6, Config.LAYER6 + filename + '.npy')
            #            print('\nLayer 4 Shape: ', load_np_array('embeddings/lay41.npy').shape)
            #            print('\nLayer 4 Shape: ', np.load('embeddings/lay41.npy').shape)
            print('Layer 5 Shape: ', lay5.shape)
            print('Layer 6 Shape: ', lay6.shape)
    print('LAYER4: ', Config.LAYER4)
    ground_truths = []
    predictions = []
    fnames = []

    print('Decoding predictions...')
    bar = progressbar.ProgressBar(max_value=batch_count,
                                  widget=progressbar.AdaptiveETA)

    # Get number of accessible CPU cores for this process
    try:
        num_processes = cpu_count()
    except:
        num_processes = 1

    # Second pass, decode logits and compute WER and edit distance metrics
    for logits, batch in bar(
            zip(logitses, split_data(test_data, FLAGS.test_batch_size))):
        seq_lengths = batch['features_len'].values.astype(np.int32)
        decoded = ctc_beam_search_decoder_batch(logits,
                                                seq_lengths,
                                                Config.alphabet,
                                                FLAGS.beam_width,
                                                num_processes=num_processes,
                                                scorer=scorer)
        #print('Batch\n', batch)
        ground_truths.extend(
            Config.alphabet.decode(l) for l in batch['transcript'])
        fnames.extend([l for l in batch['fname']])
        #fnames.append(batch['fname'])
        #print(fnames)
        predictions.extend(d[0][1] for d in decoded)

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances,
                                         losses, fnames)
    print('Sample Lengths: ', len(samples))
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)
    print(report_samples)
    print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
    print('-' * 80)
    count = 0
    for sample in report_samples:
        count += 1
        with open(Config.TEXT + sample.fname + '.txt', 'w') as f:
            f.write(sample.res)
        print("File Name: ", sample.fname)
        print('WER: %f, CER: %f, loss: %f' %
              (sample.wer, sample.distance, sample.loss))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)
    print('Total Count: ', count)
    return samples
Example #10
0
        def run_test(init_op, dataset):
            logitses = []
            losses = []
            seq_lengths = []
            ground_truths = []

            bar = create_progressbar(
                prefix='Computing acoustic model predictions | ',
                widgets=[
                    'Steps: ',
                    progressbar.Counter(), ' | ',
                    progressbar.Timer()
                ]).start()
            log_progress('Computing acoustic model predictions...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    logits, loss_, lengths, transcripts = session.run(
                        [transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                step_count += 1
                bar.update(step_count)

                logitses.append(logits)
                losses.extend(loss_)
                seq_lengths.append(lengths)
                ground_truths.extend(
                    sparse_tensor_value_to_texts(transcripts, Config.alphabet))

            bar.finish()

            predictions = []

            bar = create_progressbar(max_value=step_count,
                                     prefix='Decoding predictions | ').start()
            log_progress('Decoding predictions...')

            # Second pass, decode logits and compute WER and edit distance metrics
            for logits, seq_length in bar(zip(logitses, seq_lengths)):
                decoded = ctc_beam_search_decoder_batch(
                    logits,
                    seq_length,
                    Config.alphabet,
                    FLAGS.beam_width,
                    num_processes=num_processes,
                    scorer=scorer)
                predictions.extend(d[0][1] for d in decoded)

            distances = [
                levenshtein(a, b) for a, b in zip(ground_truths, predictions)
            ]

            wer, cer, samples = calculate_report(ground_truths, predictions,
                                                 distances, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)

            print('Test on %s - WER: %f, CER: %f, loss: %f' %
                  (dataset, wer, cer, mean_loss))
            print('-' * 80)
            for sample in report_samples:
                print('WER: %f, CER: %f, loss: %f' %
                      (sample.wer, sample.distance, sample.loss))
                print(' - src: "%s"' % sample.src)
                print(' - res: "%s"' % sample.res)
                print('-' * 80)

            return samples
Example #11
0
def evaluate(test_data, inference_graph):
    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                    FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                    Config.alphabet)


    def create_windows(features):
        num_strides = len(features) - (Config.n_context * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2*Config.n_context+1
        features = np.lib.stride_tricks.as_strided(
            features,
            (num_strides, window_size, Config.n_input),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        return features

    # Create overlapping windows over the features
    test_data['features'] = test_data['features'].apply(create_windows)

    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, layers = inference_graph

        # Transpose to batch major for decoder
        transposed = tf.transpose(outputs['outputs'], [1, 0, 2])

        labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels")
        label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths")

        # We add 1 to all elements of the transcript to avoid any zero values
        # since we use that as an end-of-sequence token for converting the batch
        # into a SparseTensor. So here we convert the placeholder back into a
        # SparseTensor and subtract ones to get the real labels.
        sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph)
        neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape)
        sparse_labels = tf.sparse_add(sparse_labels, neg_ones)

        loss = tf.nn.ctc_loss(labels=sparse_labels,
                              inputs=layers['raw_logits'],
                              sequence_length=inputs['input_lengths'])

        # Create a saver using variables from the above newly created graph
        mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')}
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        logitses = []
        losses = []

        print('Computing acoustic model predictions...')
        batch_count = len(test_data) // FLAGS.test_batch_size
        bar = progressbar.ProgressBar(max_value=batch_count,
                                      widget=progressbar.AdaptiveETA)

        # First pass, compute losses and transposed logits for decoding
        for batch in bar(split_data(test_data, FLAGS.test_batch_size)):
            session.run(outputs['initialize_state'])

            features = pad_to_dense(batch['features'].values)
            features_len = batch['features_len'].values
            labels = pad_to_dense(batch['transcript'].values + 1)
            label_lengths = batch['transcript_len'].values

            logits, loss_ = session.run([transposed, loss], feed_dict={
                inputs['input']: features,
                inputs['input_lengths']: features_len,
                labels_ph: labels,
                label_lengths_ph: label_lengths
            })

            logitses.append(logits)
            losses.extend(loss_)

    ground_truths = []
    predictions = []

    print('Decoding predictions...')
    bar = progressbar.ProgressBar(max_value=batch_count,
                                  widget=progressbar.AdaptiveETA)

    # Get number of accessible CPU cores for this process
    try:
        num_processes = cpu_count()
    except:
        num_processes = 1

    # Second pass, decode logits and compute WER and edit distance metrics
    for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))):
        seq_lengths = batch['features_len'].values.astype(np.int32)
        decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, Config.alphabet, FLAGS.beam_width,
                                                num_processes=num_processes, scorer=scorer)

        ground_truths.extend(Config.alphabet.decode(l) for l in batch['transcript'])
        predictions.extend(d[0][1] for d in decoded)

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)

    print('Test - WER: %f, CER: %f, loss: %f' %
          (wer, cer, mean_loss))
    print('-' * 80)
    for sample in report_samples:
        print('WER: %f, CER: %f, loss: %f' %
              (sample.wer, sample.distance, sample.loss))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)

    return samples