Exemple #1
0
def test_replicate_cdswordseg(datadir):
    sep = Separator()

    _tags = [utt for utt in codecs.open(
        os.path.join(datadir, 'tagged.txt'), 'r', encoding='utf8')
            if utt]
    _prepared = prepare(_tags, separator=sep)
    _gold = gold(_tags, separator=sep)
    _train = _tags[:200]

    model = dibs.CorpusSummary(_train)
    segmented = dibs.segment(_prepared, model)
    score = evaluate(segmented, _gold)

    # we obtained that score from the dibs version in CDSWordSeg
    # (using wordseg.prepare and wordseg.evaluate in both cases). You
    # can replicate this result in CDSWordseg using
    # ".../CDSwordSeg/algoComp/segment.py test/data/tagged.txt -a dibs"
    expected = {
        'type_fscore': 0.2359,
        'type_precision': 0.2084,
        'type_recall': 0.2719,
        'token_fscore': 0.239,
        'token_precision': 0.3243,
        'token_recall': 0.1892,
        'boundary_all_fscore': 0.6543,
        'boundary_all_precision': 0.8377,
        'boundary_all_recall': 0.5367,
        'boundary_noedge_fscore': 0.4804,
        'boundary_noedge_precision': 0.7161,
        'boundary_noedge_recall': 0.3614}

    assert score == pytest.approx(expected, rel=1e-3)
Exemple #2
0
def test_replicate(datadir):
    sep = Separator()

    _tags = [
        utt for utt in codecs.open(
            os.path.join(datadir, 'tagged.txt'), 'r', encoding='utf8') if utt
    ][:100]  # 100 first lines only
    _prepared = prepare(_tags, separator=sep)
    _gold = gold(_tags, separator=sep)

    segmented = puddle.segment(_prepared, nfolds=1)
    score = evaluate(segmented, _gold)

    # we obtained that score from the dibs version in CDSWordSeg
    # (using wordseg.prepare and wordseg.evaluate in both cases)
    expected = {
        'type_fscore': 0.06369,
        'type_precision': 0.1075,
        'type_recall': 0.04525,
        'token_fscore': 0.06295,
        'token_precision': 0.2056,
        'token_recall': 0.03716,
        'boundary_all_fscore': 0.4605,
        'boundary_all_precision': 1.0,
        'boundary_all_recall': 0.2991,
        'boundary_noedge_fscore': 0.02806,
        'boundary_noedge_precision': 1.0,
        'boundary_noedge_recall': 0.01423
    }

    assert score == pytest.approx(expected, rel=1e-3)
Exemple #3
0
def test_replicate(datadir):
    sep = Separator()

    _tags = [
        utt for utt in codecs.open(
            os.path.join(datadir, 'tagged.txt'), 'r', encoding='utf8') if utt
    ][:100]  # 100 first lines only
    _prepared = prepare(_tags, separator=sep)
    _gold = gold(_tags, separator=sep)

    segmented = tp.segment(_prepared)
    score = evaluate(segmented, _gold)

    # we obtained that score from the dibs version in CDSWordSeg
    # (using wordseg.prepare and wordseg.evaluate in both cases)
    expected = {
        'type_fscore': 0.304,
        'type_precision': 0.2554,
        'type_recall': 0.3756,
        'token_fscore': 0.3994,
        'token_precision': 0.3674,
        'token_recall': 0.4375,
        'boundary_all_fscore': 0.7174,
        'boundary_all_precision': 0.6671,
        'boundary_all_recall': 0.776,
        'boundary_noedge_fscore': 0.6144,
        'boundary_noedge_precision': 0.557,
        'boundary_noedge_recall': 0.685
    }

    assert score == pytest.approx(expected, rel=1e-3)
Exemple #4
0
def test_boundary_3():
    text = ['hell o']
    gold = ['h ello']

    score = evaluate(text, gold)
    expected = {
        'boundary_all_precision': 2.0 / 3.0,
        'boundary_all_recall': 2.0 / 3.0,
        'boundary_all_fscore': 2.0 / 3.0,
        'boundary_noedge_precision': 0,
        'boundary_noedge_recall': 0,
        'boundary_noedge_fscore': 0
    }

    for k, v in expected.items():
        assert score[k] == v, k
Exemple #5
0
def test_boundary_1():
    text = ['hello']
    gold = ['hello']

    score = {k: v for k, v in evaluate(text, gold).items() if 'boundary' in k}
    expected = {
        'boundary_all_precision': 1.0,
        'boundary_all_recall': 1.0,
        'boundary_all_fscore': 1.0,
        'boundary_noedge_precision': None,
        'boundary_noedge_recall': None,
        'boundary_noedge_fscore': None
    }

    for k, v in expected.items():
        assert score[k] == v, k
def test_replicate_cdswordseg(datadir):
    sep = Separator()

    # only the last 10 lines, for a fast test. We cannot take the 10
    # first lines because they cause the dpseg_bugfix to correct a
    # fold (the implementation of that fix differs in CDS and wordseg,
    # so the results are not replicated exactly)
    _tags = [
        utt for utt in codecs.open(
            os.path.join(datadir, 'tagged.txt'), 'r', encoding='utf8') if utt
    ][-10:]

    _prepared = prepare(_tags, separator=sep, unit='syllable')
    _gold = gold(_tags, separator=sep)

    uni_dmcmc_conf = [
        c for c in wordseg.utils.get_config_files('dpseg') if 'uni_dmcmc' in c
    ][0]
    args = '--ngram 1 --a1 0 --b1 1 -C {}'.format(uni_dmcmc_conf)
    segmented = segment(_prepared, nfolds=5, njobs=4, args=args)
    score = evaluate(segmented, _gold)

    # we obtained that scores from the dpseg version in CDSWordSeg
    expected = {
        'type_fscore': 0.3768,
        'type_precision': 0.3939,
        'type_recall': 0.3611,
        'token_fscore': 0.3836,
        'token_precision': 0.4118,
        'token_recall': 0.359,
        'boundary_all_fscore': 0.7957,
        'boundary_all_precision': 0.8409,
        'boundary_all_recall': 0.7551,
        'boundary_noedge_fscore': 0.6415,
        'boundary_noedge_precision': 0.7083,
        'boundary_noedge_recall': 0.5862
    }

    assert score == pytest.approx(expected, rel=1e-3)
Exemple #7
0
def _test_basic(text, gold, units, expected):
    pred = evaluate(text, gold, units=units)
    assert pred == pytest.approx(expected)
Exemple #8
0
def test_ipa():
    text = ['juːviː mɔː kʊkɪz ']
    gold = ['juː viː mɔː kʊkɪz']
    evaluate(text, gold)
Exemple #9
0
def test_gold_on_gold():
    gold = ['the dog bites the dog']
    for v in evaluate(gold, gold).values():
        assert v == 1.0
Exemple #10
0
# generate the gold text
gold = list(gold(text))

# segment the prepared text with different algorithms
segmented_baseline = baseline.segment(prepared, probability=0.2)
segmented_tp = tp.segment(prepared, threshold='relative')
segmented_puddle = puddle.segment(prepared, njobs=4, window=2)
segmented_dpseg = dpseg.segment(prepared, nfolds=1, args='--randseed 1')
segmented_ag = ag.segment(prepared, nruns=4, njobs=4, args='-n 100')

# we must provide a trained model to dibs (with stats on diphones)
model_dibs = dibs.CorpusSummary(text)
segmented_dibs = dibs.segment(prepared, model_dibs)

# evaluate them against the gold file
eval_baseline = evaluate(segmented_baseline, gold, units=prepared)
eval_tp = evaluate(segmented_tp, gold, units=prepared)
eval_puddle = evaluate(segmented_puddle, gold, units=prepared)
eval_dpseg = evaluate(segmented_dpseg, gold, units=prepared)
eval_ag = evaluate(segmented_ag, gold, units=prepared)
eval_dibs = evaluate(segmented_dibs, gold, units=prepared)


# a little function to display score with 4-digits precision
def display(score):
    if score is None:
        return 'None'
    else:
        return '%.4g' % score