def create_bundle( alphabet_path, lm_path, vocab_path, package_path, force_utf8, default_alpha, default_beta, ): words = set() vocab_looks_char_based = True with open(vocab_path) as fin: for line in fin: for word in line.split(): words.add(word.encode("utf-8")) if len(word) > 1: vocab_looks_char_based = False print("{} unique words read from vocabulary file.".format(len(words))) print("{} like a character based model.".format( "Looks" if vocab_looks_char_based else "Doesn't look")) if force_utf8 != None: # pylint: disable=singleton-comparison use_utf8 = force_utf8.value print("Forcing UTF-8 mode = {}".format(use_utf8)) else: use_utf8 = vocab_looks_char_based if use_utf8: serialized_alphabet = UTF8Alphabet().serialize() else: serialized_alphabet = Alphabet(alphabet_path).serialize() alphabet = NativeAlphabet() err = alphabet.deserialize(serialized_alphabet, len(serialized_alphabet)) if err != 0: print("Error loading alphabet: {}".format(err)) sys.exit(1) scorer = Scorer() scorer.set_alphabet(alphabet) scorer.set_utf8_mode(use_utf8) scorer.reset_params(default_alpha, default_beta) scorer.load_lm(lm_path) scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) scorer.save_dictionary(package_path, True) # append, not overwrite print("Package created in {}".format(package_path))
def initialize_globals(): c = AttrDict() # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if not FLAGS.checkpoint_dir: FLAGS.checkpoint_dir = xdg.save_data_path( os.path.join('deepspeech', 'checkpoints')) if FLAGS.load not in ['last', 'best', 'init', 'auto', 'transfer']: FLAGS.load = 'auto' # Set default summary dir if not FLAGS.summary_dir: FLAGS.summary_dir = xdg.save_data_path( os.path.join('deepspeech', 'summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tfv1.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads, gpu_options=tfv1.GPUOptions(allow_growth=FLAGS.use_allow_growth)) # CPU device c.cpu_device = '/cpu:0' # Available GPU devices c.available_devices = get_available_gpus(c.session_config) # If there is no GPU available, we fall back to CPU based operation if not c.available_devices: c.available_devices = [c.cpu_device] if FLAGS.utf8: c.alphabet = UTF8Alphabet() else: c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Size of audio window in samples if (FLAGS.feature_win_len * FLAGS.audio_sample_rate) % 1000 != 0: log_error( '--feature_win_len value ({}) in milliseconds ({}) multiplied ' 'by --audio_sample_rate value ({}) must be an integer value. Adjust ' 'your --feature_win_len value or resample your audio accordingly.' ''.format(FLAGS.feature_win_len, FLAGS.feature_win_len / 1000, FLAGS.audio_sample_rate)) sys.exit(1) c.audio_window_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_len / 1000) # Stride for feature computations in samples if (FLAGS.feature_win_step * FLAGS.audio_sample_rate) % 1000 != 0: log_error( '--feature_win_step value ({}) in milliseconds ({}) multiplied ' 'by --audio_sample_rate value ({}) must be an integer value. Adjust ' 'your --feature_win_step value or resample your audio accordingly.' ''.format(FLAGS.feature_win_step, FLAGS.feature_win_step / 1000, FLAGS.audio_sample_rate)) sys.exit(1) c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: if not os.path.exists(FLAGS.one_shot_infer): log_error( 'Path specified in --one_shot_infer is not a valid file.') sys.exit(1) ConfigSingleton._config = c # pylint: disable=protected-access
import os from ds_ctcdecoder import ctc_beam_search_decoder_batch, Scorer from util.text import Alphabet, UTF8Alphabet alphabet = UTF8Alphabet() scorer = Scorer(0.75, 1.85, "/content/DeepSpeech-Indo/data/lm/kenlm_tamil.scorer", alphabet) print(scorer)