Python ctc_beam_search_decoder Exemples, ds_ctcdecoder.ctc_beam_search_decoder Python Exemples

Exemple #1

0

Afficher le fichier

def do_single_file_inference(input_file_path):
    with tfv1.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        saver = tfv1.train.Saver()

        # Restore variables from training checkpoint
        # TODO: This restores the most recent checkpoint, but if we use validation to counteract
        #       over-fitting, we may want to restore an earlier checkpoint.
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        features, features_len = audiofile_to_features(input_file_path)
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']:
            features,
            inputs['input_lengths']:
            features_len,
            inputs['previous_state_c']:
            previous_state_c,
            inputs['previous_state_h']:
            previous_state_h,
        },
                                         session=session)

        logits = np.squeeze(logits)

        if FLAGS.lm_binary_path:
            scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                            FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                            Config.alphabet)
        else:
            scorer = None
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer,
                                          cutoff_prob=FLAGS.cutoff_prob,
                                          cutoff_top_n=FLAGS.cutoff_top_n)
        # Print highest probability result
        print(decoded[0][1])

Exemple #2

0

Afficher le fichier

def do_single_file_inference(input_file_path):
    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, layers = create_inference_graph(batch_size=1,
                                                         n_steps=-1)

        # REVIEW josephz: Hack: print all layers here.
        for i, l in enumerate(layers):
            print("layer '{}': '{}'".format(i, l))

        # Create a saver using variables from the above newly created graph
        mapping = {
            v.op.name: v
            for v in tf.global_variables()
            if not v.op.name.startswith('previous_state_')
        }
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        # TODO: This restores the most recent checkpoint, but if we use validation to counteract
        #       over-fitting, we may want to restore an earlier checkpoint.
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        session.run(outputs['initialize_state'])

        features = audiofile_to_input_vector(input_file_path, Config.n_input,
                                             Config.n_context)
        num_strides = len(features) - (Config.n_context * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2 * Config.n_context + 1
        features = np.lib.stride_tricks.as_strided(
            features, (num_strides, window_size, Config.n_input),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        logits = session.run(outputs['outputs'],
                             feed_dict={
                                 inputs['input']: [features],
                                 inputs['input_lengths']: [num_strides],
                             })

        logits = np.squeeze(logits)

        scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                        FLAGS.lm_trie_path, Config.alphabet)
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer)
        # Print highest probability result
        print(decoded[0][1])

Exemple #3

0

Afficher le fichier

    def classify(self, audio_path):
        """
        Classify with 3rd-party ctc beam search
        """
        sample = self.load_audio(audio_path)

        # Apply softmax for CTC decoder
        logits = tf.nn.softmax(self.logits, name='logits')

        logits = logits.eval(feed_dict={self.audio: sample}, session=self.sess)
        print('logits:', logits)

        logits = np.squeeze(logits)

        if FLAGS.lm_binary_path:
            self.scorer = Scorer(
                FLAGS.lm_alpha, FLAGS.lm_beta,
                os.path.join('DeepSpeech', FLAGS.lm_binary_path),
                os.path.join('DeepSpeech', FLAGS.lm_trie_path),
                Config.alphabet)
        else:
            self.scorer = None

        r = ctc_beam_search_decoder(logits,
                                    Config.alphabet,
                                    FLAGS.beam_width,
                                    scorer=self.scorer,
                                    cutoff_prob=FLAGS.cutoff_prob,
                                    cutoff_top_n=FLAGS.cutoff_top_n)

        # Print highest probability result
        print(r[0][1])

Exemple #4

0

Afficher le fichier

def evaluate(filename='data/test/1_input.npy'):

    points = np.load(filename)

    # print("Points before pre",points.shape)
    NORM_ARGS = [
        "origin", "filp_h", "smooth", "slope", "resample", "slant", "height"
    ]
    FEAT_ARGS = [
        "x_cor", "y_cor", "penup", "dir", "curv", "vic_aspect", "vic_curl",
        "vic_line", "vic_slope", "bitmap"
    ]
    # print("Normalizing trajectory...")
    traj = normalize_trajectory(points, NORM_ARGS)
    # print(traj)
    # print("Calculating feature vector sequence...")
    feat_seq_mat = preprocess_handwriting(traj, FEAT_ARGS)
    feat_seq_mat = feat_seq_mat.astype('float32')
    feat_seq_mat.shape

    data = []

    train_input = handwriting_to_input_vector(feat_seq_mat, 20, 9)
    train_input = train_input.astype('float32')

    data.append(train_input)
    # data_len

    data = np.asarray(data)
    # data_len = np.asarray(train_input)

    # Pad input to max_time_step of this batch
    source, source_lengths = pad_sequences(data)
    my_logits = sess.run(logits,
                         feed_dict={
                             input_tensor: source,
                             seq_length: source_lengths
                         })
    my_logits = np.squeeze(my_logits)
    maxT, _ = my_logits.shape  # dim0=t, dim1=c

    # apply softmax
    res = np.zeros(my_logits.shape)
    for t in range(maxT):
        y = my_logits[t, :]
        e = np.exp(y)
        s = np.sum(e)
        res[t, :] = e / s

    decoded = ctc_beam_search_decoder(res,
                                      alphabet,
                                      beam_width,
                                      scorer=scorer,
                                      cutoff_prob=cutoff_prob,
                                      cutoff_top_n=cutoff_top_n)

    print("Result : " + decodex(decoded[0][1], mapping))

Exemple #5

0

Afficher le fichier

Fichier : model.py Projet : dredwardhyde/speech-recognition-examples

 def beam_search_with_lm(self, xb):
     with torch.no_grad():
         out = self.forward(xb)
         softmax_out = out.softmax(2).cpu().numpy()
         char_list = []
         for i in range(softmax_out.shape[0]):
             char_list.append(
                 ctc_beam_search_decoder(probs_seq=softmax_out[i, :],
                                         alphabet=self.alphabet,
                                         scorer=self.scorer,
                                         beam_size=25)[0][1])
     return char_list

Exemple #6

0

Afficher le fichier

Fichier : DeepSpeech.py Projet : bababoss/deepspeech_plus_plus

def do_single_file_inference(input_file_path):
    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        mapping = {
            v.op.name: v
            for v in tf.global_variables()
            if not v.op.name.startswith('previous_state_')
        }
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        # TODO: This restores the most recent checkpoint, but if we use validation to counteract
        #       over-fitting, we may want to restore an earlier checkpoint.
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)
        session.run(outputs['initialize_state'])

        features, features_len = audiofile_to_features(input_file_path)

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']: features,
            inputs['input_lengths']: features_len,
        },
                                         session=session)

        logits = np.squeeze(logits)

        scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                        FLAGS.lm_trie_path, Config.alphabet)
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer)
        # Print highest probability result
        print(decoded[0][1])

Exemple #7

0

Afficher le fichier

Fichier : DeepSpeech.py Projet : wahyubram82/DeepSpeech

def do_single_file_inference(input_file_path):
    with tfv1.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Restore variables from training checkpoint
        if FLAGS.load == 'auto':
            method_order = ['best', 'last']
        else:
            method_order = [FLAGS.load]
        load_or_init_graph(session, method_order)

        features, features_len = audiofile_to_features(input_file_path)
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']:
            features,
            inputs['input_lengths']:
            features_len,
            inputs['previous_state_c']:
            previous_state_c,
            inputs['previous_state_h']:
            previous_state_h,
        },
                                         session=session)

        logits = np.squeeze(logits)

        if FLAGS.scorer_path:
            scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path,
                            Config.alphabet)
        else:
            scorer = None
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer,
                                          cutoff_prob=FLAGS.cutoff_prob,
                                          cutoff_top_n=FLAGS.cutoff_top_n)
        # Print highest probability result
        print(decoded[0][1])

Exemple #8

0

Afficher le fichier

Fichier : model.py Projet : dredwardhyde/iam-crnn-ctc-recognition

 def beam_search_with_lm(self, xb):
     with torch.no_grad():
         out = self.forward(xb)
         # This tensor for each image in the batch contains probabilities of each label for each input feature
         out = out.softmax(2)
         softmax_out = out.permute(1, 0, 2).cpu().numpy()
         char_list = []
         for i in range(softmax_out.shape[0]):
             char_list.append(
                 ctc_beam_search_decoder(probs_seq=softmax_out[i, :],
                                         alphabet=self.alphabet,
                                         beam_size=25,
                                         scorer=self.scorer)[0][1])
     return char_list

Exemple #9

0

Afficher le fichier

Fichier : DeepSpeech.py Projet : Perpleex/DeepSpeech

def do_single_file_inference(input_file_path):
    with tfv1.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        saver = tfv1.train.Saver()

        # Restore variables from training checkpoint
        loaded = False
        if not loaded and FLAGS.load in ['auto', 'last']:
            loaded = try_loading(session, saver, 'checkpoint', 'most recent', load_step=False)
        if not loaded and FLAGS.load in ['auto', 'best']:
            loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation', load_step=False)
        if not loaded:
            print('Could not load checkpoint from {}'.format(FLAGS.checkpoint_dir))
            sys.exit(1)

        features, features_len = audiofile_to_features(input_file_path)
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']: features,
            inputs['input_lengths']: features_len,
            inputs['previous_state_c']: previous_state_c,
            inputs['previous_state_h']: previous_state_h,
        }, session=session)

        logits = np.squeeze(logits)

        if FLAGS.lm_binary_path:
            scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                            FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                            Config.alphabet)
        else:
            scorer = None
        decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width,
                                          scorer=scorer, cutoff_prob=FLAGS.cutoff_prob,
                                          cutoff_top_n=FLAGS.cutoff_top_n)
        # Print highest probability result
        print(decoded[0][1])

Exemple #10

0

Afficher le fichier

Fichier : DeepSpeech.py Projet : gulshan-mittal/DeepSpeech

def do_single_file_inference(input_file_path):
    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')}
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        # TODO: This restores the most recent checkpoint, but if we use validation to counteract
        #       over-fitting, we may want to restore an earlier checkpoint.
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)
        session.run(outputs['initialize_state'])

        features = audiofile_to_input_vector(input_file_path, Config.n_input, Config.n_context)
        num_strides = len(features) - (Config.n_context * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2*Config.n_context+1
        features = np.lib.stride_tricks.as_strided(
            features,
            (num_strides, window_size, Config.n_input),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        logits = session.run(outputs['outputs'], feed_dict = {
            inputs['input']: [features],
            inputs['input_lengths']: [num_strides],
        })

        logits = np.squeeze(logits)

        scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                        FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                        Config.alphabet)
        decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer)
        # Print highest probability result
        print(decoded[0][1])

Exemple #11

0

Afficher le fichier

    def generate_lm(
        grouping_key: Tuple[np.str, np.str], data_df: pd.DataFrame
    ) -> pd.DataFrame:
        (
            identifier,
            text_document_id,
        ) = grouping_key
        identifier = str(identifier)
        text_document_id = str(text_document_id)

        transcript = data_df.transcript[0]
        with tempfile.NamedTemporaryFile("w+t", dir=debug_work_dir) as input_txt:
            input_txt.write(transcript)
            input_txt.flush()
            os.makedirs(os.path.join(debug_work_dir, identifier), exist_ok=True)
            scorer_path = os.path.join(
                debug_work_dir, identifier, text_document_id + ".scorer"
            )
            data_lower, vocab_str = convert_and_filter_topk(
                scorer_path, input_txt.name, 500000
            )
            build_lm(
                scorer_path,
                kenlm_path,
                5,
                "85%",
                "0|0|1",
                True,
                255,
                8,
                "trie",
                data_lower,
                vocab_str,
            )
            os.remove(scorer_path + "." + "lower.txt.gz")
            os.remove(scorer_path + "." + "lm.arpa")
            os.remove(scorer_path + "." + "lm_filtered.arpa")

            create_bundle(
                alphabet_path,
                scorer_path + "." + "lm.binary",
                scorer_path + "." + "vocab-500000.txt",
                scorer_path,
                False,
                0.931289039105002,
                1.1834137581510284,
            )
            os.remove(scorer_path + "." + "lm.binary")
            os.remove(scorer_path + "." + "vocab-500000.txt")

        with open(alphabet_path) as fh:
            num_output_symbols = len(fh.readlines()) + 1
        assert num_output_symbols == 32, f"GALVEZ:{num_output_symbols}"
        transcripts = []

        id_to_symbol = {}
        with open(alphabet_path) as fh:
            for i, line in enumerate(fh):
                id_to_symbol[i] = line.rstrip()
        id_to_symbol[31] = "blank"

        for row in data_df.itertuples():
            log_probabilities = row.log_probabilities.reshape(-1, num_output_symbols)
            probabilities = np.exp(log_probabilities)
            # np.exp(probabilities, out=probabilities)
            np.testing.assert_allclose(probabilities.sum(axis=1), 1.0, atol=1e-3)
            # simple_decoder_output = []
            # for t in range(probabilities.shape[0]):
            #   best = np.argmax(probabilities[t,:])
            #   print(np.max(probabilities[t,:]))
            #   if (id_to_symbol[best] != "blank"):
            #     simple_decoder_output.append(id_to_symbol[best])

            # print("GALVEZ simple output:", "".join(simple_decoder_output))

            cutoff_prob = 1.0
            cutoff_top_n = 100
            scorer = ds_ctcdecoder.Scorer()
            result = scorer.init(
                scorer_path.encode("utf-8"), alphabet_path.encode("utf-8")
            )
            scorer.set_utf8_mode(False)
            assert result == 0, result
            alphabet = ds_ctcdecoder.Alphabet()
            result = alphabet.init(alphabet_path.encode("utf-8"))
            assert not scorer.is_utf8_mode()
            assert result == 0, result
            scorer = None
            outputs = ds_ctcdecoder.ctc_beam_search_decoder(
                probabilities, alphabet, 100, cutoff_prob, cutoff_top_n, scorer
            )
            print(f"GALVEZ:output={outputs[0][1]}")
            print(f"GALVEZ:length={probabilities.shape[0] * 30. / 1000.}")
            transcripts.append(outputs[0][1])

        return pd.DataFrame({"path": pd.Series(transcripts)})

Exemple #12

0

Afficher le fichier

text_file = open("chars_small.txt", "w", encoding='utf-8')
text_file.write('\n'.join([x if x != '#' else '\\#' for x in list(classes)]))
text_file.close()


def softmax(matrix):
    time_steps, _ = matrix.shape
    result = np.zeros(matrix.shape)
    for t in range(time_steps):
        e = np.exp(matrix[t, :])
        result[t, :] = e / np.sum(e)
    return result


def load_rnn_output(fn):
    return np.genfromtxt(fn, delimiter=';')[:, :-1]


alphabet = Alphabet(os.path.abspath("chars_small.txt"))
crnn_output = softmax(load_rnn_output('./rnn_output.csv'))
res = ctc_beam_search_decoder(probs_seq=crnn_output,
                              alphabet=alphabet,
                              beam_size=25,
                              scorer=Scorer(alphabet=alphabet,
                                            scorer_path='iam.scorer',
                                            alpha=0.75,
                                            beta=1.85))
# predicted: the fake friend of the family has to
# actual: the fake friend of the family, like the
print(res[0][1])