Example #1
0
def evaluate_rule_based(input_file='silabe.test.xml'):
    syll_true = [s.strip() for _, s in syllabifications(input_file)]
    syll_pred = [syll(s.replace('-', '')) for s in syll_true]
    word_accuracy = np.mean([w_true == w_pred for w_true, w_pred in
        zip(syll_true, syll_pred)])
    pairs = [(lbl_true, lbl_pred)
             for (s_true, s_pred) in zip(syll_true, syll_pred)
             for (_, _, lbl_true), (_, _, lbl_pred) in zip(
             all_splits(s_true), all_splits(s_pred))]
    hyph_true, hyph_pred = zip(*pairs)
    hyph_true = np.array(hyph_true) == 0
    hyph_pred = np.array(hyph_pred) == 0
    return (word_accuracy, accuracy_score(hyph_true, hyph_pred),
            f1_score(hyph_true, hyph_pred))
Example #2
0
def evaluate_rule_based(input_file='silabe.test.xml'):
    syll_true = [s.strip() for _, s in syllabifications(input_file)]
    syll_pred = [syll(s.replace('-', '')) for s in syll_true]
    word_accuracy = np.mean(
        [w_true == w_pred for w_true, w_pred in zip(syll_true, syll_pred)])
    pairs = [(lbl_true, lbl_pred)
             for (s_true, s_pred) in zip(syll_true, syll_pred)
             for (_, _, lbl_true), (
                 _, _, lbl_pred) in zip(all_splits(s_true), all_splits(s_pred))
             ]
    hyph_true, hyph_pred = zip(*pairs)
    hyph_true = np.array(hyph_true) == 0
    hyph_pred = np.array(hyph_pred) == 0
    return (word_accuracy, accuracy_score(hyph_true, hyph_pred),
            f1_score(hyph_true, hyph_pred))
Example #3
0
def word_to_feature_dict(word, stress, size=2, unigram=False):
    x = []
    y = []
    for left, right, label in all_splits(word):
        lsz = len(left)
        y.append(label)
        if unigram:
            # unigram features in window
            features = dict([(str(-1 - k), c) for k, c in enumerate(left[-size:])])
            features.update(dict([(str(1 + k), c) for k, c in enumerate(right[:size])]))
        else:
            features = {}
            for k in xrange(size):
                for i in xrange(size - k):
                    right_feature = right[i : i + k + 1]
                    left_feature = left[lsz - i - k - 1 : lsz - i]
                    if len(right_feature) == k + 1:
                        features["%s-%s" % (i + 1, i + k + 1)] = right_feature
                    if len(left_feature) == k + 1:
                        features["%s-%s" % (-i - 1, -i - k - 1)] = left_feature
        x.append(features)
    word_stripped = word.replace("-", "")
    return (
        x,
        [_build_feature_dict(word_stripped, k, size, size) for k in xrange(len(word_stripped))],
        # (np.array(y) == 0).astype(int),
        np.array(y, dtype=int) + 2,
        np.array(stress, dtype=int),
    )
Example #4
0
def word_to_feature_dict(word, stress, size=2, unigram=False):
    x = []
    y = []
    for left, right, label in all_splits(word):
        lsz = len(left)
        y.append(label)
        if unigram:
            # unigram features in window
            features = dict([(str(-1 - k), c) for k, c in enumerate(left[-size:])])
            features.update(dict([(str(1 + k), c) for k, c in enumerate(right[:size])]))
        else:
            features = {}
            for k in xrange(size):
                for i in xrange(size - k):
                    right_feature = right[i:i + k + 1]
                    left_feature = left[lsz - i - k - 1:lsz - i]
                    if len(right_feature) == k + 1:
                        features['%s-%s' % (i + 1, i + k + 1)] = right_feature
                    if len(left_feature) == k + 1:
                        features['%s-%s' % (-i - 1, -i - k - 1)] = left_feature
        x.append(features)
    word_stripped = word.replace('-', '')
    return (x, [_build_feature_dict(word_stripped, k, size, size)
               for k in xrange(len(word_stripped))],
        #(np.array(y) == 0).astype(int),
        np.array(y, dtype=int) + 2,
        np.array(stress, dtype=int))
Example #5
0
def training_instances(syls):
    for syl in syls:
        for left, right, label in all_splits(syl.strip()):
            yield unicode(left), unicode(right), label
Example #6
0
def training_instances(syls):
    for syl in syls:
        for left, right, label in all_splits(syl.strip()):
            yield unicode(left), unicode(right), label