def test_compute_error_rate(
        temp_file_1_name, temp_file_2_name, temp_file_3_name):
    with kaldi_io.open('ark:' + temp_file_1_name, 'tv', 'w') as ref_writer:
        ref_writer.write('A', ('lorem', 'ipsum', 'dolor', 'sit', 'amet'))
        ref_writer.write('B', ('consectetur', 'adipiscing', 'elit'))
    with kaldi_io.open('ark:' + temp_file_2_name, 'tv', 'w') as hyp_writer:
        hyp_writer.write(
            'A', ('laura', 'ipsum', 'dollars', 'sit', 'down', 'amet'))
        hyp_writer.write(
            'B', ('consecutive', 'elite'))
    # A : lorem -> laura, dolor -> dollars, -> down
    # B : consectetur -> consecutive, adipiscing -> , elit -> elite
    # with insertions = 6 / 8
    # without insertions = 5 / 8
    ret_code = command_line.compute_error_rate([
        'ark:' + temp_file_1_name,
        'ark:' + temp_file_2_name,
        temp_file_3_name,
    ])
    assert ret_code == 0
    with open(temp_file_3_name) as out_file_reader:
        out_text = out_file_reader.read()
    assert 'Error rate: 75.00%' in out_text
    ret_code = command_line.compute_error_rate([
        'ark:' + temp_file_1_name,
        'ark:' + temp_file_2_name,
        temp_file_3_name,
        '--include-inserts-in-cost=false',
        '--report-accuracy=true',
    ])
    assert ret_code == 0
    with open(temp_file_3_name) as out_file_reader:
        out_text = out_file_reader.read()
    assert 'Accuracy: {:.2f}%'.format((1 - 5 / 8) * 100) in out_text
Exemple #2
0
def test_compute_error_rate(temp_file_1_name, temp_file_2_name,
                            temp_file_3_name):
    with kaldi_io.open('ark:' + temp_file_1_name, 'tv', 'w') as ref_writer:
        ref_writer.write('A', ('lorem', 'ipsum', 'dolor', 'sit', 'amet'))
        ref_writer.write('B', ('consectetur', 'adipiscing', 'elit'))
    with kaldi_io.open('ark:' + temp_file_2_name, 'tv', 'w') as hyp_writer:
        hyp_writer.write('A',
                         ('laura', 'ipsum', 'dollars', 'sit', 'down', 'amet'))
        hyp_writer.write('B', ('consecutive', 'elite'))
    # A : lorem -> laura, dolor -> dollars, -> down
    # B : consectetur -> consecutive, adipiscing -> , elit -> elite
    # with insertions = 6 / 8
    # without insertions = 5 / 8
    ret_code = command_line.compute_error_rate([
        'ark:' + temp_file_1_name,
        'ark:' + temp_file_2_name,
        temp_file_3_name,
    ])
    assert ret_code == 0
    with open(temp_file_3_name) as out_file_reader:
        out_text = out_file_reader.read()
    assert 'Error rate: 75.00%' in out_text
    ret_code = command_line.compute_error_rate([
        'ark:' + temp_file_1_name,
        'ark:' + temp_file_2_name,
        temp_file_3_name,
        '--include-inserts-in-cost=false',
        '--report-accuracy=true',
    ])
    assert ret_code == 0
    with open(temp_file_3_name) as out_file_reader:
        out_text = out_file_reader.read()
    assert 'Accuracy: {:.2f}%'.format((1 - 5 / 8) * 100) in out_text
def elicit_warning(filename, threaded=False):
    # helper to elicit a natural warning from kaldi
    writer = io.open('ark,t:{}'.format(filename), 'bv', 'w')
    writer.write('zz', [np.infty])
    writer.close()
    reader = io.open(
        'ark,t{}:{}'.format(',bg' if threaded else '', filename), 'bv')
    next(reader)
    reader.close()
Exemple #4
0
def elicit_warning(filename, threaded=False):
    # helper to elicit a natural warning from kaldi
    writer = io.open('ark,t:{}'.format(filename), 'bv', 'w')
    writer.write('zz', [np.infty])
    writer.close()
    reader = io.open('ark,t{}:{}'.format(',bg' if threaded else '', filename),
                     'bv')
    next(reader)
    reader.close()
Exemple #5
0
def _write_pickle_to_table_empty(wspecifier, logger):
    '''Special case when pickle file(s) was/were empty'''
    logger.info('Opening {}'.format(wspecifier))
    # doesn't matter what type we choose; we're not writing anything
    try:
        kaldi_io.open(wspecifier, 'bm', 'w')
    except IOError as error:
        logger.error(error.message, exc_info=True)
        return 1
    logger.warn('No entries were written (pickle file(s) was/were empty)')
    return 0
def test_write_table_to_pickle(values, temp_file_1_name, temp_file_2_name):
    if len(values):
        kaldi_dtype = kaldi_io.util.infer_kaldi_data_type(values[0]).value
    else:
        kaldi_dtype = 'bm'
    with kaldi_io.open('ark:' + temp_file_1_name, kaldi_dtype, 'w') as writer:
        for num, value in enumerate(values):
            writer.write(str(num), value)
    ret_code = command_line.write_table_to_pickle(
        ['ark:' + temp_file_1_name, temp_file_2_name, '-i', kaldi_dtype])
    assert ret_code == 0
    num_entries = 0
    pickle_file = open(temp_file_2_name, 'rb')
    num_entries = 0
    try:
        while True:
            key, value = pickle.load(pickle_file)
            num_entries = int(key) + 1
            try:
                values[num_entries - 1].dtype
                assert np.allclose(value, values[num_entries - 1])
            except AttributeError:
                assert value == values[num_entries - 1]
    except EOFError:
        pass
    assert num_entries == len(values)
def test_write_pickle_to_table(values, temp_file_1_name, temp_file_2_name):
    if len(values):
        kaldi_dtype = kaldi_io.util.infer_kaldi_data_type(values[0]).value
    else:
        kaldi_dtype = 'bm'
    with open(temp_file_1_name, 'wb') as pickle_file:
        for num, value in enumerate(values):
            pickle.dump((str(num), value), pickle_file)
    ret_code = command_line.write_pickle_to_table(
        [temp_file_1_name, 'ark:' + temp_file_2_name, '-o', kaldi_dtype])
    assert ret_code == 0
    kaldi_reader = kaldi_io.open('ark:' + temp_file_2_name, kaldi_dtype, 'r')
    num_entries = 0
    for key, value in kaldi_reader.items():
        num_entries = int(key) + 1
        try:
            values[num_entries - 1].dtype
            assert np.allclose(value, values[num_entries - 1])
        except AttributeError:
            assert value == values[num_entries - 1]
    assert num_entries == len(values)
Exemple #8
0
def _write_pickle_to_table_key_value(options, logger):
    try:
        logger.info('Opening {}'.format(options.value_in))
        if options.value_in.endswith('.gz'):
            import gzip
            value_in = gzip.open(options.value_in, 'rb')
        else:
            value_in = open(options.value_in, 'rb')
        logger.info('Opening {}'.format(options.key_in))
        if options.key_in.endswith('.gz'):
            import gzip
            key_in = gzip.open(options.key_in, 'rt')
        else:
            key_in = open(options.key_in, 'r')
    except IOError as error:
        logger.error(error.message, exc_info=True)
        return 1
    try:
        value = pickle.load(value_in)
    except pickle.UnpicklingError as error:
        value_in.close()
        key_in.close()
        logger.error(error.message, exc_info=True)
        return 1
    except EOFError:
        value_in.close()
        try:
            pickle.load(key_in)
            logger.error('Number of keys (1) and values (0) do not match')
            return 1
        except pickle.UnpicklingError as error:
            key_in.close()
            logger.error(error.message, exc_info=True)
            return 1
        key_in.close()
        return _write_pickle_to_table_empty(options.wspecifier, logger)
    try:
        key = pickle.load(key_in)
    except EOFError:
        value_in.close()
        key_in.close()
        logger.error('Number of keys (0) and values (1) do not match')
        return 1
    except pickle.UnpicklingError as error:
        value_in.close()
        key_in.close()
        logger.error(error.message, exc_info=True)
        return 1
    out_type = options.out_type
    try:
        logging.info('Opening {}'.format(options.wspecifier))
        writer = kaldi_io.open(options.wspecifier, out_type, 'w')
    except IOError as error:
        value_in.close()
        key_in.close()
        logger.error(error.message, exc_info=True)
        return 1
    num_entries = 0
    try:
        while True:
            if out_type.is_floating_point:
                if out_type.is_double:
                    try:
                        value = value.astype(np.float64, copy=False)
                    except AttributeError:
                        pass  # will happen implicitly
                else:
                    try:
                        value = value.astype(np.float32, copy=False)
                    except AttributeError:
                        pass  # will happen implicitly
            writer.write(key, value)
            num_entries += 1
            if num_entries % 10 == 0:
                logger.info('Processed {} entries'.format(num_entries))
            logger.log(9, 'Processed key {}'.format(key))
            key = pickle.load(key_in)
            value = pickle.load(value_in)
    except EOFError:
        pass
    except (IOError, ValueError, TypeError, pickle.UnpicklingError) as error:
        logger.error(error.message, exc_info=True)
        return 1
    try:
        pickle.load(value_in)
        value_in.close()
        key_in.close()
        logger.error('Number of keys ({}) and values ({}) do not match'.format(
            num_entries, num_entries + 1))
        return 1
    except EOFError:
        pass
    except (IOError, pickle.UnpicklingError) as error:
        value_in.close()
        key_in.close()
        logger.error(error.message, exc_info=True)
        return 1
    try:
        pickle.load(key_in)
        value_in.close()
        key_in.close()
        logger.error('Number of keys ({}) and values ({}) do not match'.format(
            num_entries + 1, num_entries))
        return 1
    except EOFError:
        pass
    except (IOError, pickle.UnpicklingError) as error:
        logger.error(error.message, exc_info=True)
        return 1
    finally:
        value_in.close()
        key_in.close()
    logger.info("Wrote {} entries".format(num_entries))
    return 0
Exemple #9
0
def _write_pickle_to_table_value_only(options, logger):
    '''write_pickle_to_table when only value_in has been specified'''
    from six.moves import cPickle as pickle
    try:
        logger.info('Opening {}'.format(options.value_in))
        if options.value_in.endswith('.gz'):
            import gzip
            value_in = gzip.open(options.value_in, 'rb')
        else:
            value_in = open(options.value_in, 'rb')
    except IOError as error:
        logger.error(error.message, exc_info=True)
        return 1
    try:
        key, value = pickle.load(value_in)
    except pickle.UnpicklingError as error:
        logger.error(error.message, exc_info=True)
        return 1
    except EOFError:
        value_in.close()
        return _write_pickle_to_table_empty(options.wspecifier, logger)
    out_type = options.out_type
    try:
        logging.info('Opening {}'.format(options.wspecifier))
        writer = kaldi_io.open(options.wspecifier, out_type, 'w')
    except IOError as error:
        value_in.close()
        logger.error(error.message, exc_info=True)
        return 1
    num_entries = 0
    try:
        while True:
            if out_type.is_floating_point:
                if out_type.is_double:
                    try:
                        value = value.astype(np.float64, copy=False)
                    except AttributeError:
                        pass
                else:
                    try:
                        value = value.astype(np.float32, copy=False)
                    except AttributeError:
                        pass
            writer.write(key, value)
            num_entries += 1
            if num_entries % 10 == 0:
                logger.info('Processed {} entries'.format(num_entries))
            logger.log(9, 'Processed key {}'.format(key))
            key, value = pickle.load(value_in)
    except EOFError:
        pass
    except (IOError, ValueError, TypeError, pickle.UnpicklingError) as error:
        if hasattr(error, 'message'):
            logger.error(error.message, exc_info=True)
        else:
            logger.error('error', exc_info=True)
        return 1
    finally:
        value_in.close()
    logger.info("Wrote {} entries".format(num_entries))
    return 0
Exemple #10
0
def write_table_to_pickle(args=None):
    '''Write a kaldi table to pickle file(s)

    The inverse is write_pickle_to_table
    '''
    logger = logging.getLogger(sys.argv[0])
    if not logger.handlers:
        logger.addHandler(logging.StreamHandler())
    register_logger_for_kaldi(logger)
    try:
        options = _write_table_to_pickle_parse_args(args, logger)
    except SystemExit as ex:
        return ex.code
    out_type = options.out_type
    if out_type is None:
        if options.in_type.is_floating_point:
            if options.in_type.is_double:
                out_type = np.float64
            else:
                out_type = np.float32
        else:
            out_type = np.str
    from six.moves import cPickle as pickle
    try:
        logger.info('Opening {}'.format(options.rspecifier))
        reader = kaldi_io.open(options.rspecifier, options.in_type, 'r')
        logger.info('Opening {}'.format(options.value_out))
        if options.value_out.endswith('.gz'):
            import gzip
            value_out = gzip.open(options.value_out, 'wb')
        else:
            value_out = open(options.value_out, 'wb')
        if options.key_out:
            logger.info('Opening {}'.format(options.key_out))
            if options.key_out.endswith('.gz'):
                import gzip
                key_out = gzip.open(options.key_out, 'wt')
            else:
                key_out = open(options.key_out, 'w')
        else:
            key_out = None
    except IOError as error:
        logger.error(error.message, exc_info=True)
        return 1
    num_entries = 0
    try:
        for key, value in reader.items():
            num_entries += 1
            if not np.issubdtype(out_type, np.str):
                value = value.astype(out_type)
            if key_out:
                pickle.dump(value, value_out)
                pickle.dump(key, key_out)
            else:
                pickle.dump((key, value), value_out)
            if num_entries % 10 == 0:
                logger.info('Processed {} entries'.format(num_entries))
            logger.log(9, 'Processed key {}'.format(key))
    except (IOError, ValueError) as error:
        logger.error(error.message, exc_info=True)
        return 1
    finally:
        value_out.close()
        if key_out:
            key_out.close()
    if num_entries == 0:
        logger.warn("No entries were written (table was empty)")
    else:
        logger.info("Wrote {} entries".format(num_entries))
    return 0