Python SpeechRecognizer Examples

Programming Language: Python

Namespace/Package Name: lvsr.bricks.recognizer

Class/Type: SpeechRecognizer

Examples at hotexamples.com: 9

Python SpeechRecognizer - 9 examples found. These are the top rated real world Python examples of lvsr.bricks.recognizer.SpeechRecognizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SpeechRecognizer(4)

load_params(3)

initialize(2)

analyze(1)

beam_search(1)

get_cost_graph(1)

init_beam_search(1)

sample(1)

Example #1

Show file

def sample(config, params, load_path, part):
    data = Data(**config['data'])

    recognizer = SpeechRecognizer(data.recordings_source,
                                  data.labels_source,
                                  data.eos_label,
                                  data.num_features,
                                  data.num_labels,
                                  character_map=data.character_map,
                                  name='recognizer',
                                  **config["net"])
    recognizer.load_params(load_path)

    dataset = data.get_dataset(part, add_sources=(data.uttid_source, ))
    stream = data.get_stream(part,
                             batches=False,
                             shuffle=False,
                             add_sources=(data.uttid_source, ))
    it = stream.get_epoch_iterator()

    print_to = sys.stdout
    for number, data in enumerate(it):
        print("Utterance {} ({})".format(number, data[2]), file=print_to)
        groundtruth_text = dataset.pretty_print(data[1])
        print("Groundtruth:", groundtruth_text, file=print_to)
        sample = recognizer.sample(data[0])['outputs'][:, 0]
        recognized_text = dataset.pretty_print(sample)
        print("Recognized:", recognized_text, file=print_to)

Example #2

Show file

def create_model(config, data, test_tag):

    # Build the main brick and initialize all parameters.
    recognizer = SpeechRecognizer(data.recordings_source,
                                  data.labels_source,
                                  data.eos_label,
                                  data.num_features,
                                  data.num_labels,
                                  name="recognizer",
                                  data_prepend_eos=data.prepend_eos,
                                  character_map=data.character_map,
                                  **config["net"])
    for brick_path, attribute_dict in sorted(config['initialization'].items(),
                                             key=lambda (k, v): k.count('/')):
        for attribute, value in attribute_dict.items():
            brick, = Selector(recognizer).select(brick_path).bricks
            setattr(brick, attribute, value)
            brick.push_initialization_config()
    recognizer.initialize()

    if test_tag:
        tensor.TensorVariable.__str__ = tensor.TensorVariable.__repr__
        __stream = data.get_stream("train")
        __data = next(__stream.get_epoch_iterator(as_dict=True))
        recognizer.recordings.tag.test_value = __data[data.recordings_source]
        recognizer.recordings_mask.tag.test_value = __data[
            data.recordings_source + '_mask']
        recognizer.labels.tag.test_value = __data[data.labels_source]
        recognizer.labels_mask.tag.test_value = __data[data.labels_source +
                                                       '_mask']
        theano.config.compute_test_value = 'warn'
    return recognizer

Example #3

Show file

File: main.py Project: ixtel/attention-lvcsr

def create_model(config, data, test_tag):

    # Build the main brick and initialize all parameters.
    recognizer = SpeechRecognizer(
        data.recordings_source, data.labels_source,
        data.eos_label,
        data.num_features, data.num_labels,
        name="recognizer",
        data_prepend_eos=data.prepend_eos,
        character_map=data.character_map,
        **config["net"])
    for brick_path, attribute_dict in sorted(
            config['initialization'].items(),
            key=lambda (k, v): k.count('/')):
        for attribute, value in attribute_dict.items():
            brick, = Selector(recognizer).select(brick_path).bricks
            setattr(brick, attribute, value)
            brick.push_initialization_config()
    recognizer.initialize()

    if test_tag:
        tensor.TensorVariable.__str__ = tensor.TensorVariable.__repr__
        __stream = data.get_stream("train")
        __data = next(__stream.get_epoch_iterator(as_dict=True))
        recognizer.recordings.tag.test_value = __data[data.recordings_source]
        recognizer.recordings_mask.tag.test_value = __data[data.recordings_source + '_mask']
        recognizer.labels.tag.test_value = __data[data.labels_source]
        recognizer.labels_mask.tag.test_value = __data[data.labels_source + '_mask']
        theano.config.compute_test_value = 'warn'
    return recognizer

Example #4

Show file

File: main.py Project: ixtel/attention-lvcsr

def sample(config, params, load_path, part):
    data = Data(**config['data'])

    recognizer = SpeechRecognizer(
        data.recordings_source, data.labels_source,
        data.eos_label, data.num_features, data.num_labels,
        character_map=data.character_map,
        name='recognizer', **config["net"])
    recognizer.load_params(load_path)

    dataset = data.get_dataset(part, add_sources=(data.uttid_source,))
    stream = data.get_stream(part, batches=False, shuffle=False,
                             add_sources=(data.uttid_source,))
    it = stream.get_epoch_iterator()

    print_to = sys.stdout
    for number, data in enumerate(it):
        print("Utterance {} ({})".format(number, data[2]), file=print_to)
        groundtruth_text = dataset.pretty_print(data[1])
        print("Groundtruth:", groundtruth_text, file=print_to)
        sample = recognizer.sample(data[0])['outputs'][:, 0]
        recognized_text = dataset.pretty_print(sample)
        print("Recognized:", recognized_text, file=print_to)

Example #5

Show file

File: main.py Project: dmitriy-serdyuk/twinnet-asr

def create_model(config, data, load_path=None, test_tag=False):
    """
    Build the main brick and initialize or load all parameters.

    Parameters
    ----------

    config : dict
        the configuration dict

    data : object of class Data
        the dataset creation object

    load_path : str or None
        if given a string, it will be used to load model parameters. Else,
        the parameters will be randomly initalized by calling
        recognizer.initialize()

    test_tag : bool
        if true, will add tag the input variables with test values

    """
    # First tell the recognizer about required data sources
    net_config = dict(config["net"])
    bottom_class = net_config['bottom']['bottom_class']
    input_dims = {
        source: data.num_features(source)
        for source in bottom_class.vector_input_sources
    }
    input_num_chars = {
        source: len(data.character_map(source))
        for source in bottom_class.discrete_input_sources
    }

    recognizer = SpeechRecognizer(input_dims=input_dims,
                                  input_num_chars=input_num_chars,
                                  eos_label=data.eos_label,
                                  num_phonemes=data.num_labels,
                                  name="recognizer",
                                  data_prepend_eos=data.prepend_eos,
                                  character_map=data.character_map('labels'),
                                  **net_config)
    if load_path:
        recognizer.load_params(load_path)
    else:
        for brick_path, attribute_dict in sorted(
                config['initialization'].items(), key=lambda
            (k, v): k.count('/')):
            for attribute, value in attribute_dict.items():
                brick, = Selector(recognizer).select(brick_path).bricks
                setattr(brick, attribute, value)
                brick.push_initialization_config()
        recognizer.initialize()

    if test_tag:
        # fails with newest theano
        # tensor.TensorVariable.__str__ = tensor.TensorVariable.__repr__
        __stream = data.get_stream("train")
        __data = next(__stream.get_epoch_iterator(as_dict=True))
        for __var in recognizer.inputs.values():
            __var.tag.test_value = __data[__var.name]
        theano.config.compute_test_value = 'warn'
    return recognizer

Example #6

Show file

File: main.py Project: ZhangAustin/attention-lvcsr

def search(config, params, load_path, beam_size, part, decode_only, report,
           decoded_save, nll_only, char_discount):
    from matplotlib import pyplot
    from lvsr.notebook import show_alignment

    data = Data(**config['data'])

    recognizer = SpeechRecognizer(
        data.recordings_source, data.labels_source,
        data.eos_label, data.num_features, data.num_labels,
        character_map=data.character_map,
        name='recognizer', **config["net"])
    recognizer.load_params(load_path)
    recognizer.init_beam_search(beam_size)

    dataset = data.get_dataset(part, add_sources=(data.uttid_source,))
    stream = data.get_stream(part, batches=False, shuffle=False,
                                add_sources=(data.uttid_source,))
    it = stream.get_epoch_iterator()
    if decode_only is not None:
        decode_only = eval(decode_only)

    weights = tensor.matrix('weights')
    weight_statistics = theano.function(
        [weights],
        [weights_std(weights.dimshuffle(0, 'x', 1)),
            monotonicity_penalty(weights.dimshuffle(0, 'x', 1))])

    print_to = sys.stdout
    if report:
        alignments_path = os.path.join(report, "alignments")
        if not os.path.exists(report):
            os.mkdir(report)
            os.mkdir(alignments_path)
        print_to = open(os.path.join(report, "report.txt"), 'w')

    decoded_file = None
    if decoded_save:
        decoded_file = open(decoded_save, 'w')

    num_examples = .0
    total_nll = .0
    total_errors = .0
    total_length = .0
    total_wer_errors = .0
    total_word_length = 0.
    with open(os.path.expandvars(config['vocabulary'])) as f:
        vocabulary = dict(line.split() for line in f.readlines())

    def to_words(chars):
        words = chars.split()
        words = [vocabulary[word] if word in vocabulary
                    else vocabulary['<UNK>'] for word in words]
        return words

    for number, data in enumerate(it):
        if decode_only and number not in decode_only:
            continue
        print("Utterance {} ({})".format(number, data[2]), file=print_to)
        groundtruth = dataset.decode(data[1])
        groundtruth_text = dataset.pretty_print(data[1])
        costs_groundtruth, weights_groundtruth = (
            recognizer.analyze(data[0], data[1])[:2])
        weight_std_groundtruth, mono_penalty_groundtruth = weight_statistics(
            weights_groundtruth)
        total_nll += costs_groundtruth.sum()
        num_examples += 1
        print("Groundtruth:", groundtruth_text, file=print_to)
        print("Groundtruth cost:", costs_groundtruth.sum(), file=print_to)
        print("Groundtruth weight std:", weight_std_groundtruth, file=print_to)
        print("Groundtruth monotonicity penalty:", mono_penalty_groundtruth, file=print_to)
        print("Average groundtruth cost: {}".format(total_nll / num_examples),
                file=print_to)
        if nll_only:
            continue

        before = time.time()
        outputs, search_costs = recognizer.beam_search(
            data[0], char_discount=char_discount)
        took = time.time() - before
        recognized = dataset.decode(outputs[0])
        recognized_text = dataset.pretty_print(outputs[0])
        costs_recognized, weights_recognized = (
            recognizer.analyze(data[0], outputs[0])[:2])
        weight_std_recognized, mono_penalty_recognized = weight_statistics(
            weights_recognized)
        error = min(1, wer(groundtruth, recognized))
        total_errors += len(groundtruth) * error
        total_length += len(groundtruth)

        wer_error = min(1, wer(to_words(groundtruth_text),
                                to_words(recognized_text)))
        total_wer_errors += len(groundtruth) * wer_error
        total_word_length += len(groundtruth)

        if report and recognized:
            show_alignment(weights_groundtruth, groundtruth, bos_symbol=True)
            pyplot.savefig(os.path.join(
                alignments_path, "{}.groundtruth.png".format(number)))
            show_alignment(weights_recognized, recognized, bos_symbol=True)
            pyplot.savefig(os.path.join(
                alignments_path, "{}.recognized.png".format(number)))

        if decoded_file is not None:
            print("{} {}".format(data[2], ' '.join(recognized)), file=decoded_file)

        print("Decoding took:", took, file=print_to)
        print("Beam search cost:", search_costs[0], file=print_to)
        print("Recognized:", recognized_text, file=print_to)
        print("Recognized cost:", costs_recognized.sum(), file=print_to)
        print("Recognized weight std:", weight_std_recognized, file=print_to)
        print("Recognized monotonicity penalty:", mono_penalty_recognized, file=print_to)
        print("CER:", error, file=print_to)
        print("Average CER:", total_errors / total_length, file=print_to)
        print("WER:", wer_error, file=print_to)
        print("Average WER:", total_wer_errors / total_word_length, file=print_to)

Example #7

Show file

File: main.py Project: ZhangAustin/attention-lvcsr

def train(config, save_path, bokeh_name,
          params, bokeh_server, test_tag, use_load_ext,
          load_log, fast_start, validation_epochs, validation_batches,
          per_epochs, per_batches):
    root_path, extension = os.path.splitext(save_path)

    data = Data(**config['data'])

    # Build the main brick and initialize all parameters.
    recognizer = SpeechRecognizer(
        data.recordings_source, data.labels_source,
        data.eos_label,
        data.num_features, data.num_labels,
        name="recognizer",
        data_prepend_eos=data.prepend_eos,
        character_map=data.character_map,
        **config["net"])
    for brick_path, attribute_dict in sorted(
            config['initialization'].items(),
            key=lambda (k, v): -k.count('/')):
        for attribute, value in attribute_dict.items():
            brick, = Selector(recognizer).select(brick_path).bricks
            setattr(brick, attribute, value)
            brick.push_initialization_config()
    recognizer.initialize()

    # Separate attention_params to be handled differently
    # when regularization is applied
    attention = recognizer.generator.transition.attention
    attention_params = Selector(attention).get_parameters().values()

    logger.info(
        "Initialization schemes for all bricks.\n"
        "Works well only in my branch with __repr__ added to all them,\n"
        "there is an issue #463 in Blocks to do that properly.")

    def show_init_scheme(cur):
        result = dict()
        for attr in dir(cur):
            if attr.endswith('_init'):
                result[attr] = getattr(cur, attr)
        for child in cur.children:
            result[child.name] = show_init_scheme(child)
        return result
    logger.info(pprint.pformat(show_init_scheme(recognizer)))

    if params:
        logger.info("Load parameters from " + params)
        recognizer.load_params(params)

    if test_tag:
        tensor.TensorVariable.__str__ = tensor.TensorVariable.__repr__
        __stream = data.get_stream("train")
        __data = next(__stream.get_epoch_iterator(as_dict=True))
        recognizer.recordings.tag.test_value = __data[data.recordings_source]
        recognizer.recordings_mask.tag.test_value = __data[data.recordings_source + '_mask']
        recognizer.labels.tag.test_value = __data[data.labels_source]
        recognizer.labels_mask.tag.test_value = __data[data.labels_source + '_mask']
        theano.config.compute_test_value = 'warn'

    batch_cost = recognizer.get_cost_graph().sum()
    batch_size = named_copy(recognizer.recordings.shape[1], "batch_size")
    # Assumes constant batch size. `aggregation.mean` is not used because
    # of Blocks #514.
    cost = batch_cost / batch_size
    cost.name = "sequence_log_likelihood"
    logger.info("Cost graph is built")

    # Fetch variables useful for debugging.
    # It is important not to use any aggregation schemes here,
    # as it's currently impossible to spread the effect of
    # regularization on their variables, see Blocks #514.
    cost_cg = ComputationGraph(cost)
    r = recognizer
    energies, = VariableFilter(
        applications=[r.generator.readout.readout], name="output_0")(
                cost_cg)
    bottom_output, = VariableFilter(
        applications=[r.bottom.apply], name="output")(
                cost_cg)
    attended, = VariableFilter(
        applications=[r.generator.transition.apply], name="attended")(
                cost_cg)
    attended_mask, = VariableFilter(
        applications=[r.generator.transition.apply], name="attended_mask")(
                cost_cg)
    weights, = VariableFilter(
        applications=[r.generator.evaluate], name="weights")(
                cost_cg)
    max_recording_length = named_copy(r.recordings.shape[0],
                                      "max_recording_length")
    # To exclude subsampling related bugs
    max_attended_mask_length = named_copy(attended_mask.shape[0],
                                          "max_attended_mask_length")
    max_attended_length = named_copy(attended.shape[0],
                                     "max_attended_length")
    max_num_phonemes = named_copy(r.labels.shape[0],
                                  "max_num_phonemes")
    min_energy = named_copy(energies.min(), "min_energy")
    max_energy = named_copy(energies.max(), "max_energy")
    mean_attended = named_copy(abs(attended).mean(),
                               "mean_attended")
    mean_bottom_output = named_copy(abs(bottom_output).mean(),
                                    "mean_bottom_output")
    weights_penalty = named_copy(monotonicity_penalty(weights, r.labels_mask),
                                 "weights_penalty")
    weights_entropy = named_copy(entropy(weights, r.labels_mask),
                                 "weights_entropy")
    mask_density = named_copy(r.labels_mask.mean(),
                              "mask_density")
    cg = ComputationGraph([
        cost, weights_penalty, weights_entropy,
        min_energy, max_energy,
        mean_attended, mean_bottom_output,
        batch_size, max_num_phonemes,
        mask_density])

    # Regularization. It is applied explicitly to all variables
    # of interest, it could not be applied to the cost only as it
    # would not have effect on auxiliary variables, see Blocks #514.
    reg_config = config['regularization']
    regularized_cg = cg
    if reg_config.get('dropout'):
        logger.info('apply dropout')
        regularized_cg = apply_dropout(cg, [bottom_output], 0.5)
    if reg_config.get('noise'):
        logger.info('apply noise')
        noise_subjects = [p for p in cg.parameters if p not in attention_params]
        regularized_cg = apply_noise(cg, noise_subjects, reg_config['noise'])
    regularized_cost = regularized_cg.outputs[0]
    regularized_weights_penalty = regularized_cg.outputs[1]

    # Model is weird class, we spend lots of time arguing with Bart
    # what it should be. However it can already nice things, e.g.
    # one extract all the parameters from the computation graphs
    # and give them hierahical names. This help to notice when a
    # because of some bug a parameter is not in the computation
    # graph.
    model = SpeechModel(regularized_cost)
    params = model.get_parameter_dict()
    logger.info("Parameters:\n" +
                pprint.pformat(
                    [(key, params[key].get_value().shape) for key
                        in sorted(params.keys())],
                    width=120))

    # Define the training algorithm.
    train_conf = config['training']
    clipping = StepClipping(train_conf['gradient_threshold'])
    clipping.threshold.name = "gradient_norm_threshold"
    rule_names = train_conf.get('rules', ['momentum'])
    core_rules = []
    if 'momentum' in rule_names:
        logger.info("Using scaling and momentum for training")
        core_rules.append(Momentum(train_conf['scale'], train_conf['momentum']))
    if 'adadelta' in rule_names:
        logger.info("Using AdaDelta for training")
        core_rules.append(AdaDelta(train_conf['decay_rate'], train_conf['epsilon']))
    max_norm_rules = []
    if reg_config.get('max_norm', False):
        logger.info("Apply MaxNorm")
        maxnorm_subjects = VariableFilter(roles=[WEIGHT])(cg.parameters)
        if reg_config.get('max_norm_exclude_lookup', False):
            maxnorm_subjects = [v for v in maxnorm_subjects
                                if not isinstance(get_brick(v), LookupTable)]
        logger.info("Parameters covered by MaxNorm:\n"
                    + pprint.pformat([name for name, p in params.items()
                                        if p in maxnorm_subjects]))
        logger.info("Parameters NOT covered by MaxNorm:\n"
                    + pprint.pformat([name for name, p in params.items()
                                        if not p in maxnorm_subjects]))
        max_norm_rules = [
            Restrict(VariableClipping(reg_config['max_norm'], axis=0),
                        maxnorm_subjects)]
    algorithm = GradientDescent(
        cost=regularized_cost +
            reg_config.get("penalty_coof", .0) * regularized_weights_penalty / batch_size +
            reg_config.get("decay", .0) *
            l2_norm(VariableFilter(roles=[WEIGHT])(cg.parameters)) ** 2,
        parameters=params.values(),
        step_rule=CompositeRule(
            [clipping] + core_rules + max_norm_rules +
            # Parameters are not changed at all
            # when nans are encountered.
            [RemoveNotFinite(0.0)]))

    # More variables for debugging: some of them can be added only
    # after the `algorithm` object is created.
    observables = regularized_cg.outputs
    observables += [
        algorithm.total_step_norm, algorithm.total_gradient_norm,
        clipping.threshold]
    for name, param in params.items():
        num_elements = numpy.product(param.get_value().shape)
        norm = param.norm(2) / num_elements ** 0.5
        grad_norm = algorithm.gradients[param].norm(2) / num_elements ** 0.5
        step_norm = algorithm.steps[param].norm(2) / num_elements ** 0.5
        stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm)
        stats.name = name + '_stats'
        observables.append(stats)

    def attach_aggregation_schemes(variables):
        # Aggregation specification has to be factored out as a separate
        # function as it has to be applied at the very last stage
        # separately to training and validation observables.
        result = []
        for var in variables:
            if var.name == 'weights_penalty':
                result.append(named_copy(aggregation.mean(var, batch_size),
                                            'weights_penalty_per_recording'))
            elif var.name == 'weights_entropy':
                result.append(named_copy(aggregation.mean(
                    var, recognizer.labels_mask.sum()), 'weights_entropy_per_label'))
            else:
                result.append(var)
        return result

    # Build main loop.
    logger.info("Initialize extensions")
    extensions = []
    if use_load_ext and params:
        extensions.append(Load(params, load_iteration_state=True, load_log=True))
    if load_log and params:
        extensions.append(LoadLog(params))
    extensions += [
        Timing(after_batch=True),
        CGStatistics(),
        #CodeVersion(['lvsr']),
        ]
    extensions.append(TrainingDataMonitoring(
        [observables[0], algorithm.total_gradient_norm,
            algorithm.total_step_norm, clipping.threshold,
            max_recording_length,
            max_attended_length, max_attended_mask_length], after_batch=True))
    average_monitoring = TrainingDataMonitoring(
        attach_aggregation_schemes(observables),
        prefix="average", every_n_batches=10)
    extensions.append(average_monitoring)
    validation = DataStreamMonitoring(
        attach_aggregation_schemes([cost, weights_entropy, weights_penalty]),
        data.get_stream("valid"), prefix="valid").set_conditions(
            before_first_epoch=not fast_start,
            every_n_epochs=validation_epochs,
            every_n_batches=validation_batches,
            after_training=False)
    extensions.append(validation)
    recognizer.init_beam_search(10)
    per = PhonemeErrorRate(recognizer, data.get_dataset("valid"))
    per_monitoring = DataStreamMonitoring(
        [per], data.get_stream("valid", batches=False, shuffle=False),
        prefix="valid").set_conditions(
            before_first_epoch=not fast_start,
            every_n_epochs=per_epochs,
            every_n_batches=per_batches,
            after_training=False)
    extensions.append(per_monitoring)
    track_the_best_per = TrackTheBest(
        per_monitoring.record_name(per)).set_conditions(
            before_first_epoch=True, after_epoch=True)
    track_the_best_likelihood = TrackTheBest(
        validation.record_name(cost)).set_conditions(
            before_first_epoch=True, after_epoch=True)
    extensions += [track_the_best_likelihood, track_the_best_per]
    extensions.append(AdaptiveClipping(
        algorithm.total_gradient_norm.name,
        clipping, train_conf['gradient_threshold'],
        decay_rate=0.998, burnin_period=500))
    extensions += [
        SwitchOffLengthFilter(data.length_filter,
            after_n_batches=train_conf.get('stop_filtering')),
        FinishAfter(after_n_batches=train_conf['num_batches'],
                    after_n_epochs=train_conf['num_epochs'])
        .add_condition(["after_batch"], _gradient_norm_is_none),
        # Live plotting: requires launching `bokeh-server`
        # and allows to see what happens online.
        Plot(bokeh_name
             if bokeh_name
             else os.path.basename(save_path),
             [# Plot 1: training and validation costs
             [average_monitoring.record_name(regularized_cost),
             validation.record_name(cost)],
             # Plot 2: gradient norm,
             [average_monitoring.record_name(algorithm.total_gradient_norm),
             average_monitoring.record_name(clipping.threshold)],
             # Plot 3: phoneme error rate
             [per_monitoring.record_name(per)],
             # Plot 4: training and validation mean weight entropy
             [average_monitoring._record_name('weights_entropy_per_label'),
             validation._record_name('weights_entropy_per_label')],
             # Plot 5: training and validation monotonicity penalty
             [average_monitoring._record_name('weights_penalty_per_recording'),
             validation._record_name('weights_penalty_per_recording')]],
             every_n_batches=10,
             server_url=bokeh_server),
        Checkpoint(save_path,
                   before_first_epoch=not fast_start, after_epoch=True,
                   every_n_batches=train_conf.get('save_every_n_batches'),
                   save_separately=["model", "log"],
                   use_cpickle=True)
        .add_condition(
            ['after_epoch'],
            OnLogRecord(track_the_best_per.notification_name),
            (root_path + "_best" + extension,))
        .add_condition(
            ['after_epoch'],
            OnLogRecord(track_the_best_likelihood.notification_name),
            (root_path + "_best_ll" + extension,)),
        ProgressBar(),
        Printing(every_n_batches=1,
                    attribute_filter=PrintingFilterList()
                    )]

    # Save the config into the status
    log = TrainingLog()
    log.status['_config'] = repr(config)
    main_loop = MainLoop(
        model=model, log=log, algorithm=algorithm,
        data_stream=data.get_stream("train"),
        extensions=extensions)
    main_loop.run()

Example #8

Show file

def search(config, params, load_path, part, decode_only, report, decoded_save,
           nll_only, seed):
    import matplotlib
    matplotlib.use("Agg")
    from matplotlib import pyplot
    from lvsr.notebook import show_alignment

    data = Data(**config['data'])
    search_conf = config['monitoring']['search']

    logger.info("Recognizer initialization started")
    recognizer = SpeechRecognizer(data.recordings_source,
                                  data.labels_source,
                                  data.eos_label,
                                  data.num_features,
                                  data.num_labels,
                                  character_map=data.character_map,
                                  name='recognizer',
                                  **config["net"])
    recognizer.load_params(load_path)
    recognizer.init_beam_search(search_conf['beam_size'])
    logger.info("Recognizer is initialized")

    stream = data.get_stream(part,
                             batches=False,
                             shuffle=part == 'train',
                             add_sources=(data.uttid_source, ),
                             num_examples=500 if part == 'train' else None,
                             seed=seed)
    it = stream.get_epoch_iterator()
    if decode_only is not None:
        decode_only = eval(decode_only)

    weights = tensor.matrix('weights')
    weight_statistics = theano.function([weights], [
        weights_std(weights.dimshuffle(0, 'x', 1)),
        monotonicity_penalty(weights.dimshuffle(0, 'x', 1))
    ])

    print_to = sys.stdout
    if report:
        alignments_path = os.path.join(report, "alignments")
        if not os.path.exists(report):
            os.mkdir(report)
            os.mkdir(alignments_path)
        print_to = open(os.path.join(report, "report.txt"), 'w')

    decoded_file = None
    if decoded_save:
        decoded_file = open(decoded_save, 'w')

    num_examples = .0
    total_nll = .0
    total_errors = .0
    total_length = .0
    total_wer_errors = .0
    total_word_length = 0.

    if config.get('vocabulary'):
        with open(os.path.expandvars(config['vocabulary'])) as f:
            vocabulary = dict(line.split() for line in f.readlines())

        def to_words(chars):
            words = chars.split()
            words = [
                vocabulary[word] if word in vocabulary else vocabulary['<UNK>']
                for word in words
            ]
            return words

    for number, example in enumerate(it):
        if decode_only and number not in decode_only:
            continue
        print("Utterance {} ({})".format(number, example[2]), file=print_to)
        groundtruth = data.decode(example[1])
        groundtruth_text = data.pretty_print(example[1])
        costs_groundtruth, weights_groundtruth = (recognizer.analyze(
            example[0], example[1], example[1])[:2])
        weight_std_groundtruth, mono_penalty_groundtruth = weight_statistics(
            weights_groundtruth)
        total_nll += costs_groundtruth.sum()
        num_examples += 1
        print("Groundtruth:", groundtruth_text, file=print_to)
        print("Groundtruth cost:", costs_groundtruth.sum(), file=print_to)
        print("Groundtruth weight std:", weight_std_groundtruth, file=print_to)
        print("Groundtruth monotonicity penalty:",
              mono_penalty_groundtruth,
              file=print_to)
        print("Average groundtruth cost: {}".format(total_nll / num_examples),
              file=print_to)
        if nll_only:
            print_to.flush()
            continue

        before = time.time()
        outputs, search_costs = recognizer.beam_search(
            example[0],
            char_discount=search_conf['char_discount'],
            round_to_inf=search_conf['round_to_inf'],
            stop_on=search_conf['stop_on'])
        took = time.time() - before
        recognized = data.decode(outputs[0])
        recognized_text = data.pretty_print(outputs[0])
        if recognized:
            # Theano scan doesn't work with 0 length sequences
            costs_recognized, weights_recognized = (recognizer.analyze(
                example[0], example[1], outputs[0])[:2])
            weight_std_recognized, mono_penalty_recognized = weight_statistics(
                weights_recognized)
            error = min(1, wer(groundtruth, recognized))
        else:
            error = 1
        total_errors += len(groundtruth) * error
        total_length += len(groundtruth)

        if config.get('vocabulary'):
            wer_error = min(
                1, wer(to_words(groundtruth_text), to_words(recognized_text)))
            total_wer_errors += len(groundtruth) * wer_error
            total_word_length += len(groundtruth)

        if report and recognized:
            show_alignment(weights_groundtruth, groundtruth, bos_symbol=True)
            pyplot.savefig(
                os.path.join(alignments_path,
                             "{}.groundtruth.png".format(number)))
            show_alignment(weights_recognized, recognized, bos_symbol=True)
            pyplot.savefig(
                os.path.join(alignments_path,
                             "{}.recognized.png".format(number)))

        if decoded_file is not None:
            print("{} {}".format(example[2], ' '.join(recognized)),
                  file=decoded_file)

        print("Decoding took:", took, file=print_to)
        print("Beam search cost:", search_costs[0], file=print_to)
        print("Recognized:", recognized_text, file=print_to)
        if recognized:
            print("Recognized cost:", costs_recognized.sum(), file=print_to)
            print("Recognized weight std:",
                  weight_std_recognized,
                  file=print_to)
            print("Recognized monotonicity penalty:",
                  mono_penalty_recognized,
                  file=print_to)
        print("CER:", error, file=print_to)
        print("Average CER:", total_errors / total_length, file=print_to)
        if config.get('vocabulary'):
            print("WER:", wer_error, file=print_to)
            print("Average WER:",
                  total_wer_errors / total_word_length,
                  file=print_to)
        print_to.flush()

Example #9

Show file

File: main.py Project: DingKe/attention-lvcsr

def create_model(config, data,
                 load_path=None,
                 test_tag=False):
    """
    Build the main brick and initialize or load all parameters.

    Parameters
    ----------

    config : dict
        the configuration dict

    data : object of class Data
        the dataset creation object

    load_path : str or None
        if given a string, it will be used to load model parameters. Else,
        the parameters will be randomly initalized by calling
        recognizer.initialize()

    test_tag : bool
        if true, will add tag the input variables with test values

    """
    # First tell the recognizer about required data sources
    net_config = dict(config["net"])
    bottom_class = net_config['bottom']['bottom_class']
    input_dims = {
        source: data.num_features(source)
        for source in bottom_class.vector_input_sources}
    input_num_chars = {
        source: len(data.character_map(source))
        for source in bottom_class.discrete_input_sources}

    recognizer = SpeechRecognizer(
        input_dims=input_dims,
        input_num_chars=input_num_chars,
        eos_label=data.eos_label,
        num_phonemes=data.num_labels,
        name="recognizer",
        data_prepend_eos=data.prepend_eos,
        character_map=data.character_map('labels'),
        **net_config)
    if load_path:
        recognizer.load_params(load_path)
    else:
        for brick_path, attribute_dict in sorted(
                config['initialization'].items(),
                key=lambda (k, v): k.count('/')):
            for attribute, value in attribute_dict.items():
                brick, = Selector(recognizer).select(brick_path).bricks
                setattr(brick, attribute, value)
                brick.push_initialization_config()
        recognizer.initialize()

    if test_tag:
        # fails with newest theano
        # tensor.TensorVariable.__str__ = tensor.TensorVariable.__repr__
        __stream = data.get_stream("train")
        __data = next(__stream.get_epoch_iterator(as_dict=True))
        for __var in recognizer.inputs.values():
            __var.tag.test_value = __data[__var.name]
        theano.config.compute_test_value = 'warn'
    return recognizer