Example #1
0
def main_swap(args):
    print('Swapping inhibiting layers between', args.project, 'and',
          args.project2)

    project = Project.load(args.project)
    project2 = Project.load(args.project2)

    if len(project.patternset) != len(project2.patternset):
        raise ValueError(
            'You can only swap layers between projects with same number of layers!'
        )

    for i in range(1, len(project.patternset), 2):
        project.patternset[i], project2.patternset[i] = project2.patternset[
            i], project.patternset[i]

    print('Performance of', args.project)
    project.missed, project.false = do_test(project, project.dictionary)
    print('Performance of', args.project2)
    project2.missed, project2.false = do_test(project2, project2.dictionary)

    if args.commit:
        project.save(args.project)
        project2.save(args.project2)
        print('...Committed')
    else:
        print('...Projects NOT changed (use --commit flag to save changes)')

    print()
    return 0
Example #2
0
def main_swap(args):
    print('Swapping inhibiting layers between', args.project, 'and', args.project2)

    project = Project.load(args.project)
    project2 = Project.load(args.project2)
    
    if len(project.patternset) != len(project2.patternset):
        raise ValueError('You can only swap layers between projects with same number of layers!')
    
    for i in range(1, len(project.patternset), 2):
        project.patternset[i], project2.patternset[i] = project2.patternset[i], project.patternset[i]

    print('Performance of', args.project)
    project.missed, project.false = do_test(project, project.dictionary)
    print('Performance of', args.project2)
    project2.missed, project2.false = do_test(project2, project2.dictionary)
    
    if args.commit:
        project.save(args.project)
        project2.save(args.project2)
        print('...Committed')
    else:
        print('...Projects NOT changed (use --commit flag to save changes)')
    
    print()
    return 0
Example #3
0
def main_new(args):
    print('Creating new project', args.project, 'from dictionary',
          args.dictionary)

    if os.path.exists(args.project):
        print(
            'Project file already exists! Use different name or delete old project first. File %s'
            % args.project)
        return -1

    if not os.path.exists(args.dictionary):
        print('Dictionary file not found', args.dictionary)
        return -1

    dictionary = Dictionary.load(args.dictionary)

    if args.margins is None:
        print('Automatically computing hyphenation margins from dictionary')
        margins = dictionary.compute_margins()
    else:
        margins = Margins.parse(args.margins)

    project = Project(dictionary, margins)
    project.save(args.project)

    return main_show(args)
Example #4
0
def main_show(args):
    
    if not os.path.exists(args.project):
        print('Project file not found:', args.project)
        return -1
    
    project = Project.load(args.project)
    
    print('Project file', args.project)
    print('\tcreated:', project.created)
    print('\tlast modified:', project.modified)
    print('\tmargins:', project.margins)
    print('\tdictionary size:', len(project.dictionary.keys()))
    #if project.ignore_weights:
    #    print('\tdictionary weights were ignored (-i flag active)')
    print('\ttotal hyphens: (weighted)', project.total_hyphens)
    print('\ttotal missed : (weighted)', project.missed, percent(project.missed, project.total_hyphens))
    print('\ttotal false  : (weighted)', project.false, percent(project.false, project.total_hyphens))
    print('\tnumber of pattern levels:', len(project.patternset))
    
    for i, layer in enumerate(project.patternset):
        if i & 1 == 0:
            print((i+1), 'HYPHENATING patternset, num patterns:', len(layer))
        else:
            print((i+1), 'INHIBITING patternset, num patterns:', len(layer))
        print('\tTrained with: range %r, selector %r' % (layer.patlen_range, layer.selector))

    print()
    return 0
Example #5
0
def main_train(args):
    print('Training project', args.project, 'using range', args.range, 'and selector', args.selector)
    
    project = Project.load(args.project)

    if len(project.patternset) & 1:
        print('Training INHIBINTING pattern layer (level=%s)' % (len(project.patternset)+1))
    else:
        print('Training HYPHENATION pattern layer (level=%s)' % (len(project.patternset)+1))

    patlen_rng = Range.parse(args.range)
    selector = Selector.parse(args.selector)

    print('\tpattern lengths:', patlen_rng)
    print('\tselector:', args.selector)
    
    total_hyphens = project.total_hyphens

    project.train_new_layer(patlen_rng, selector)

    missed, false = project.missed, project.false

    print('Missed (weighted):', missed, percent(missed, total_hyphens))
    print('False (weighted):', false, percent(false, total_hyphens))

    if args.commit:
        project.save(args.project)
        print('...Committed!')
    else:
        print('...Projects NOT changed (use --commit flag to save changes)')

    print()
    return 0
Example #6
0
def main_show(args):

    if not os.path.exists(args.project):
        print('Project file not found:', args.project)
        return -1

    project = Project.load(args.project)

    print('Project file', args.project)
    print('\tcreated:', project.created)
    print('\tlast modified:', project.modified)
    print('\tmargins:', project.margins)
    print('\tdictionary size:', len(project.dictionary.keys()))
    #if project.ignore_weights:
    #    print('\tdictionary weights were ignored (-i flag active)')
    print('\ttotal hyphens: (weighted)', project.total_hyphens)
    print('\ttotal non-hyphens: ', project.total_nonhyphens)
    print('\ttotal missed : (weighted)', project.missed,
          percent(project.missed, project.total_hyphens))
    print('\ttotal false  : (weighted)', project.false,
          percent(project.false, project.total_nonhyphens))
    print('\tnumber of pattern levels:', len(project.patternset))

    for i, layer in enumerate(project.patternset):
        if i & 1 == 0:
            print((i + 1), 'HYPHENATING patternset, num patterns:', len(layer))
        else:
            print((i + 1), 'INHIBITING patternset, num patterns:', len(layer))
        print('\tTrained with: range %r, selector %r' %
              (layer.patlen_range, layer.selector))

    print()
    return 0
Example #7
0
def main_compact(args):
    print('Compacting hyphenation patterns for', args.project)
    project = Project.load(args.project)

    before_compact = [
        layer.compute_num_patterns() for layer in project.patternset
    ]

    project.patternset.compact()

    after_compact = [
        layer.compute_num_patterns() for layer in project.patternset
    ]

    print('Result:')
    for level0, (before, after) in enumerate(zip(before_compact,
                                                 after_compact)):
        print('\tLevel %s: %6d => %6d' % (level0 + 1, before, after))

    if args.commit:
        project.save(args.project)
        print('...Committed')
    else:
        print('...Project NOT changed (use --commit flag to save changes)')

    print()
    return 0
Example #8
0
def main_test(args):
    print('Testing', args.project, 'on dictionary', args.dictionary)
    project = Project.load(args.project)

    dictionary = Dictionary.load(args.dictionary)

    print('Performance of', args.project, 'on', args.dictionary)
    do_test(project, dictionary)

    if args.errors:
        with codecs.open(args.errors, 'w', 'utf-8') as f:
            for word, hyphens, missed, false in project.patternset.errors(
                    dictionary, project.margins):
                f.write(
                    format_dictionary_word(word, hyphens, missed, false) +
                    '\n')
        print('Saved errors to', args.errors)

    if args.patterns:
        with codecs.open(args.patterns, 'w', 'utf-8') as f:
            for word, hyphens, missed, false in project.patternset.errors(
                    dictionary, project.margins):
                f.write(format_word_as_pattern(word, missed, false) + '\n')
        print('Saved errors to', args.patterns)

    print()
    return 0
Example #9
0
def main_explain(args):
    print('Explaining hyphenation of', args.input, 'into', args.output,
          'using project', args.project)

    project = Project.load(args.project)

    with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f:
        with codecs.open(args.output or sys.stdout.fileno(), 'w',
                         'utf-8') as out:

            for word in f:
                word = word.strip()
                if not word:
                    continue

                explain = Explain()

                prediction = project.patternset.hyphenate_explain(
                    word, margins=project.margins, explain=explain)

                s = format_dictionary_word(word, prediction)
                out.write(s + '\n')
                out.write(s.encode('unicode-escape').decode('ascii') + '\n')
                out.write(explain.format() + '\n\n')

    print()
    return 0
Example #10
0
def main_import(args):
    print('Loading patterns from', args.input, 'into project', args.project)
    project = Project.load(args.project)

    if len(project.patternset) > 0:
        print(
            'ERROR: project already has some patterns. Can only load into empty project!'
        )
        return -1

    patterns = {}
    entered = False
    with codecs.open(args.input, 'r', 'utf-8') as f:
        for line in f:
            line = line.strip()
            line = line.split('%')[0]
            if not line:
                continue

            if line == '\\patterns{':
                entered = True

            elif entered and line == '}':
                break

            elif entered:
                text, control = PatternSet.parse_pattern(line)
                patterns[text] = control

    if patterns:
        maxlevel = 0
        for control in patterns.values():
            for level in control.values():
                maxlevel = max(maxlevel, level)

        patlen = max(len(text) for text in patterns.keys())

        for i in range(maxlevel):
            project.patternset.append(
                Layer(Range(1, patlen + 2), None, i & 2 == 1))

        for text, control in patterns.items():
            project.patternset.set_pattern_control(text, control)
    else:
        print('WARNING: patterns file is empty!')

    project.missed, project.false = do_test(project, project.dictionary)

    if args.commit:
        project.save(args.project)
        print('...Committed')
    else:
        print('...Project NOT changed (use --commit flag to save changes)')

    print()
    return 0
Example #11
0
def main_import(args):
    print('Loading patterns from', args.input, 'into project', args.project)
    project = Project.load(args.project)
    
    if len(project.patternset) > 0:
        print('ERROR: project already has some patterns. Can only load into empty project!')
        return -1

    patterns = {}
    entered = False
    with codecs.open(args.input, 'r', 'utf-8') as f:
        for line in f:
            line = line.strip()
            line = line.split('%')[0]
            if not line: 
                continue
            
            if line == '\\patterns{':
                entered = True
            
            elif entered and line == '}':
                break
            
            elif entered:
                text, control = PatternSet.parse_pattern(line)
                patterns[text] = control
    
    if patterns:
        maxlevel = 0
        for control in patterns.values():
            for level in control.values():
                maxlevel = max(maxlevel, level)
    
        patlen = max(len(text) for text in patterns.keys())
    
        for i in range(maxlevel):
            project.patternset.append(Layer(Range(1, patlen+2), None, i & 2 == 1))
    
        for text, control in patterns.items():
            project.patternset.set_pattern_control(text, control)
    else:
        print('WARNING: patterns file is empty!')

    project.missed, project.false = do_test(project, project.dictionary)

    if args.commit:
        project.save(args.project)
        print('...Committed')
    else:
        print('...Project NOT changed (use --commit flag to save changes)')
    
    print()
    return 0
Example #12
0
def main_new(args):
    print('Creating new project', args.project, 'from dictionary', args.dictionary)
    
    if os.path.exists(args.project):
        print('Project file already exists! Use different name or delete old project first. File %s' % args.project)
        return -1
    
    if not os.path.exists(args.dictionary):
        print('Dictionary file not found', args.dictionary)
        return -1
    
    dictionary = Dictionary.load(args.dictionary)

    if args.margins is None:
        print('Automatically computing hyphenation margins from dictionary')
        margins = dictionary.compute_margins()
    else:
        margins = Margins.parse(args.margins)

    project = Project(dictionary, margins)
    project.save(args.project)

    return main_show(args)
Example #13
0
def main_export(args):
    print('Exporting patterns from', args.project,
          'and saving them in TeX format to', args.output)

    if os.path.exists(args.output):
        print(
            'Pattern file already exists! Delete it first, or change the name. Pattern file: %s'
            % args.output)
        return -1

    project = Project.load(args.project)

    pattern_strings = list(project.patternset.pattern_strings())
    exceptions = list(
        project.patternset.errors(project.dictionary, project.margins))

    with codecs.open(args.output, 'w', 'utf-8') as f:
        f.write('\\patterns{\n')
        for patt in pattern_strings:
            f.write(patt + '\n')
        f.write('}\n')
        f.write('\\hyphenation{\n')
        for word, hyphens, _, _ in exceptions:
            text = format_dictionary_word(word, hyphens)
            f.write(text + '\n')
        f.write('}\n')

    print('Created TeX patterns file', args.output)
    print('Number of patterns:', len(pattern_strings))
    print('Number of exceptions:', len(exceptions))

    if args.patterns:
        print()
        with codecs.open(args.patterns, 'w', 'utf-8') as f:
            for patt in pattern_strings:
                f.write(patt + '\n')
        print('Written raw patterns to', args.patterns)

    if args.exceptions:
        print()
        with codecs.open(args.exceptions, 'w', 'utf-8') as f:
            for word, hyphens, _, _ in exceptions:
                text = format_dictionary_word(word, hyphens)
                f.write(text + '\n')
        print('Written raw exceptions to', args.exceptions)

    print()
    return 0
Example #14
0
def get_heuristic(b, g, r, t, hfunc):

    # generating a set of patterns for the given parameters

    # initialising
    p = Project.load('bds')  # input the training set
    d = p.dictionary.clone()
    s = Selector(g, b, t)
    range = Range(1, r)

    # generating patterns
    p.train_new_layer(range, s)  # trains the pattern
    false = p.false  # number of false positives by the patterns
    missed = p.missed  # number of false negatives by the patterns

    p.dictionary = d  # do we need this?
    return hfunc(false, missed)  # evaluated by given heuristic function
Example #15
0
def main_export(args):
    print('Exporting patterns from', args.project, 'and saving them in TeX format to', args.output)

    if os.path.exists(args.output):
        print('Pattern file already exists! Delete it first, or change the name. Pattern file: %s' % args.output)
        return -1
    
    project = Project.load(args.project)

    pattern_strings = list(project.patternset.pattern_strings())
    exceptions = list(project.patternset.errors(project.dictionary, project.margins))

    with codecs.open(args.output, 'w', 'utf-8') as f:
        f.write('\\patterns{\n')
        for patt in pattern_strings:
            f.write(patt + '\n')
        f.write('}\n')
        f.write('\\hyphenation{\n')
        for word, hyphens, _, _ in exceptions:
            text = format_dictionary_word(word, hyphens)
            f.write(text + '\n')
        f.write('}\n')
    
    print('Created TeX patterns file', args.output)
    print('Number of patterns:', len(pattern_strings))
    print('Number of exceptions:', len(exceptions))

    if args.patterns:
        print()
        with codecs.open(args.patterns, 'w', 'utf-8') as f:
            for patt in pattern_strings:
                f.write(patt + '\n')
        print('Written raw patterns to', args.patterns)

    if args.exceptions:
        print()
        with codecs.open(args.exceptions, 'w', 'utf-8') as f:
            for word, hyphens, _, _ in exceptions:
                text = format_dictionary_word(word, hyphens)
                f.write(text + '\n')
        print('Written raw exceptions to', args.exceptions)

    print()
    return 0
Example #16
0
def optimise_level_driver(b, g, r, t, hfunc_o, hfunc_e):
    level_bgr_map = dict()
    for i in xrange(
            1, 10):  # number of levels to perform. Start with odd level (1)
        if i % 2 == 0:  # even level
            b, g, r = optimise_level.optimise_level(b, g, r, t, hfunc_e)
        else:  # odd level
            b, g, r = optimise_level.optimise_level(b, g, r, t, hfunc_o)

        print(b, g, r)
        level_bgr_map[i] = (b, g, r)

        p = Project.load('bds')
        s = Selector(g, b, t)
        range = Range(1, r)
        l = p.train_new_layer(range, s)
        p.commit(l)
        print "Commiting Layer"
    return level_bgr_map
Example #17
0
def main_hyphenate(args):
    print('Hyphenating', args.input, 'into', args.output, 'using project', args.project)
    
    project = Project.load(args.project)

    with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f:
        with codecs.open(args.output or sys.stdout.fileno(), 'w', 'utf-8') as out:

            for word in f:
                word = word.strip()
                if not word:
                    continue
    
                prediction = project.patternset.hyphenate(word, margins=project.margins) 

                s = format_dictionary_word(word, prediction)
                out.write(s + '\n')
    
    print()
    return 0
Example #18
0
    def test(self):

        dictionary = Dictionary.from_string('''
            lo-rem
            ip-sum
            do-l-or
            sit
            a-met
            con-sec-te-tur
            adi-pis-cing
            elit
            ves-ti-bu-l-um
            eu-is-mod
            di-am
            eg-et
            bi-b-en-d-um
            ''')

        project = Project(dictionary)

        rng = Range.parse('1-2')
        selector = Selector.parse('1:1:1')

        project.train_new_layer(rng, selector)

        self.assertEqual(1, len(project.patternset))

        self.assertEqual(project.missed, 0)
        self.assertEqual(project.false, 3)

        project.train_new_layer(rng, selector)

        self.assertEqual(2, len(project.patternset))

        self.assertEqual(project.missed, 0)
        self.assertEqual(project.false, 0)

        patterns = list(project.patternset.pattern_strings())
        self.assertEqual(patterns, [
            '.e2', '1a', '1a1m', '1b', '1b1e', 'bi1', '1bu1', '1ci', 'co2',
            'c1t', 'di1', 'do1', '1d1u', 'ec1', 'eg1', 'e2l', '1en1', 'es1',
            'e1t', 'eu1', 'g1', 'g1e', 'i1a', 'i1b', 'is1', '1l', '2li',
            '1lo1', '1l1u', '1m', '1me', '1mo', '2n1', 'n1d1', '2n1s', 'o1',
            'o1l1', 'o2n1', 'o1r', '1pi', 'p1s2', '1r', '1re', 's1c', '1se',
            's1m', 's1t', '1s2u', '1t', '1te1', '1ti1', '1tu', 'u1', 'u1i',
            'u1l1', '1um'
        ])
Example #19
0
def main_compact(args):
    print('Compacting hyphenation patterns for', args.project)
    project = Project.load(args.project)

    before_compact = [layer.compute_num_patterns() for layer in project.patternset]
    
    project.patternset.compact()
    
    after_compact =  [layer.compute_num_patterns() for layer in project.patternset]
    
    print('Result:')
    for level0, (before, after) in enumerate(zip(before_compact, after_compact)):
        print('\tLevel %s: %6d => %6d' % (level0+1, before, after))
    
    if args.commit:
        project.save(args.project)
        print('...Committed')
    else:
        print('...Project NOT changed (use --commit flag to save changes)')
    
    print()
    return 0
Example #20
0
def main_hyphenate(args):
    print('Hyphenating', args.input, 'into', args.output, 'using project',
          args.project)

    project = Project.load(args.project)

    with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f:
        with codecs.open(args.output or sys.stdout.fileno(), 'w',
                         'utf-8') as out:

            for word in f:
                word = word.strip()
                if not word:
                    continue

                prediction = project.patternset.hyphenate(
                    word, margins=project.margins)

                s = format_dictionary_word(word, prediction)
                out.write(s + '\n')

    print()
    return 0
Example #21
0
def main_test(args):
    print('Testing', args.project, 'on dictionary', args.dictionary)
    project = Project.load(args.project)

    dictionary = Dictionary.load(args.dictionary)

    print('Performance of', args.project, 'on', args.dictionary)
    do_test(project, dictionary)

    if args.errors:
        with codecs.open(args.errors, 'w', 'utf-8') as f:
            for word, hyphens, missed, false in project.patternset.errors(dictionary, project.margins):
                f.write(format_dictionary_word(word, hyphens, missed, false) + '\n')
        print('Saved errors to', args.errors)

    if args.patterns:
        with codecs.open(args.patterns, 'w', 'utf-8') as f:
            for word, hyphens, missed, false in project.patternset.errors(dictionary, project.margins):
                f.write(format_word_as_pattern(word, missed, false) + '\n')
        print('Saved errors to', args.patterns)

    print()
    return 0
Example #22
0
def main_train(args):
    print('Training project', args.project, 'using range', args.range,
          'and selector', args.selector)

    project = Project.load(args.project)

    if len(project.patternset) & 1:
        print('Training INHIBINTING pattern layer (level=%s)' %
              (len(project.patternset) + 1))
    else:
        print('Training HYPHENATION pattern layer (level=%s)' %
              (len(project.patternset) + 1))

    patlen_rng = Range.parse(args.range)
    selector = Selector.parse(args.selector)

    print('\tpattern lengths:', patlen_rng)
    print('\tselector:', args.selector)

    total_hyphens = project.total_hyphens

    layer = project.train_new_layer(patlen_rng, selector)

    missed, false = project.missed, project.false

    print('Missed (weighted):', missed, percent(missed, total_hyphens))
    print('False (weighted):', false, percent(false, project.total_nonhyphens))

    if args.commit:
        project.commit(layer)
        project.save(args.project)
        print('...Committed!')
    else:
        print('...Projects NOT changed (use --commit flag to save changes)')

    print()
    return 0
Example #23
0
def main_explain(args):
    print('Explaining hyphenation of', args.input, 'into', args.output, 'using project', args.project)
    
    project = Project.load(args.project)

    with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f:
        with codecs.open(args.output or sys.stdout.fileno(), 'w', 'utf-8') as out:
            
            for word in f:
                word = word.strip()
                if not word:
                    continue
    
                explain = Explain()

                prediction = project.patternset.hyphenate_explain(word, margins=project.margins, explain=explain) 

                s = format_dictionary_word(word, prediction)
                out.write(s + '\n')
                out.write(s.encode('unicode-escape').decode('ascii') + '\n')
                out.write(explain.format() + '\n\n')
    
    print()
    return 0
Example #24
0
from patgen.project import Project
from patgen.range import Range
from patgen.selector import Selector

import csv
import pprint

p = Project.load('bds')
csvw = csv.writer(open('bds.csv', 'w'))

for r1 in xrange(3, 4):
    for g1 in xrange(1, 5):
        for b1 in xrange(1, 5):
            rg1 = Range(1, r1)
            s1 = Selector(g1, b1, 10)
            d1 = p.dictionary.clone()
            p.train_new_layer(rg1, s1)

            for r2 in xrange(3, 4):
                for g2 in xrange(1, 5):
                    for b2 in xrange(1, 5):
                        rg2 = Range(1, r2)
                        s2 = Selector(g2, b2, 10)
                        d2 = p.dictionary.clone()
                        p.train_new_layer(rg2, s2)
                        num_patterns = sum(l.compute_num_patterns()
                                           for l in p.patternset)
                        csvw.writerow((r1, g1, b1, r2, g2, b2, num_patterns,
                                       p.missed, p.false))
                        p.patternset.pop()
                        p.dictionary = d2
Example #25
0
    def test(self):

        dictionary = Dictionary.from_string('''
            lo-rem
            ip-sum
            do-l-or
            sit
            a-met
            con-sec-te-tur
            adi-pis-cing
            elit
            ves-ti-bu-l-um
            eu-is-mod
            di-am
            eg-et
            bi-b-en-d-um
            ''')

        project = Project(dictionary)

        rng = Range.parse('1-2')
        selector = Selector.parse('1:1:1')

        project.train_new_layer(rng, selector)

        self.assertEqual(1, len(project.patternset))
        
        self.assertEqual(project.missed, 0)
        self.assertEqual(project.false, 3)

        project.train_new_layer(rng, selector)

        self.assertEqual(2, len(project.patternset))
        
        self.assertEqual(project.missed, 0)
        self.assertEqual(project.false, 0)

        patterns = list(project.patternset.pattern_strings())
        self.assertEqual(patterns, [
            '.e2',
            '1a',
            '1a1m',
            '1b',
            '1b1e',
            'bi1',
            '1bu1',
            '1ci',
            'co2',
            'c1t',
            'di1',
            'do1',
            '1d1u',
            'ec1',
            'eg1',
            'e2l',
            '1en1',
            'es1',
            'e1t',
            'eu1',
            'g1',
            'g1e',
            'i1a',
            'i1b',
            'is1',
            '1l',
            '2li',
            '1lo1',
            '1l1u',
            '1m',
            '1me',
            '1mo',
            '2n1',
            'n1d1',
            '2n1s',
            'o1',
            'o1l1',
            'o2n1',
            'o1r',
            '1pi',
            'p1s2',
            '1r',
            '1re',
            's1c',
            '1se',
            's1m',
            's1t',
            '1s2u',
            '1t',
            '1te1',
            '1ti1',
            '1tu',
            'u1',
            'u1i',
            'u1l1',
            '1um'
        ])