def main_swap(args): print('Swapping inhibiting layers between', args.project, 'and', args.project2) project = Project.load(args.project) project2 = Project.load(args.project2) if len(project.patternset) != len(project2.patternset): raise ValueError( 'You can only swap layers between projects with same number of layers!' ) for i in range(1, len(project.patternset), 2): project.patternset[i], project2.patternset[i] = project2.patternset[ i], project.patternset[i] print('Performance of', args.project) project.missed, project.false = do_test(project, project.dictionary) print('Performance of', args.project2) project2.missed, project2.false = do_test(project2, project2.dictionary) if args.commit: project.save(args.project) project2.save(args.project2) print('...Committed') else: print('...Projects NOT changed (use --commit flag to save changes)') print() return 0
def main_swap(args): print('Swapping inhibiting layers between', args.project, 'and', args.project2) project = Project.load(args.project) project2 = Project.load(args.project2) if len(project.patternset) != len(project2.patternset): raise ValueError('You can only swap layers between projects with same number of layers!') for i in range(1, len(project.patternset), 2): project.patternset[i], project2.patternset[i] = project2.patternset[i], project.patternset[i] print('Performance of', args.project) project.missed, project.false = do_test(project, project.dictionary) print('Performance of', args.project2) project2.missed, project2.false = do_test(project2, project2.dictionary) if args.commit: project.save(args.project) project2.save(args.project2) print('...Committed') else: print('...Projects NOT changed (use --commit flag to save changes)') print() return 0
def main_new(args): print('Creating new project', args.project, 'from dictionary', args.dictionary) if os.path.exists(args.project): print( 'Project file already exists! Use different name or delete old project first. File %s' % args.project) return -1 if not os.path.exists(args.dictionary): print('Dictionary file not found', args.dictionary) return -1 dictionary = Dictionary.load(args.dictionary) if args.margins is None: print('Automatically computing hyphenation margins from dictionary') margins = dictionary.compute_margins() else: margins = Margins.parse(args.margins) project = Project(dictionary, margins) project.save(args.project) return main_show(args)
def main_show(args): if not os.path.exists(args.project): print('Project file not found:', args.project) return -1 project = Project.load(args.project) print('Project file', args.project) print('\tcreated:', project.created) print('\tlast modified:', project.modified) print('\tmargins:', project.margins) print('\tdictionary size:', len(project.dictionary.keys())) #if project.ignore_weights: # print('\tdictionary weights were ignored (-i flag active)') print('\ttotal hyphens: (weighted)', project.total_hyphens) print('\ttotal missed : (weighted)', project.missed, percent(project.missed, project.total_hyphens)) print('\ttotal false : (weighted)', project.false, percent(project.false, project.total_hyphens)) print('\tnumber of pattern levels:', len(project.patternset)) for i, layer in enumerate(project.patternset): if i & 1 == 0: print((i+1), 'HYPHENATING patternset, num patterns:', len(layer)) else: print((i+1), 'INHIBITING patternset, num patterns:', len(layer)) print('\tTrained with: range %r, selector %r' % (layer.patlen_range, layer.selector)) print() return 0
def main_train(args): print('Training project', args.project, 'using range', args.range, 'and selector', args.selector) project = Project.load(args.project) if len(project.patternset) & 1: print('Training INHIBINTING pattern layer (level=%s)' % (len(project.patternset)+1)) else: print('Training HYPHENATION pattern layer (level=%s)' % (len(project.patternset)+1)) patlen_rng = Range.parse(args.range) selector = Selector.parse(args.selector) print('\tpattern lengths:', patlen_rng) print('\tselector:', args.selector) total_hyphens = project.total_hyphens project.train_new_layer(patlen_rng, selector) missed, false = project.missed, project.false print('Missed (weighted):', missed, percent(missed, total_hyphens)) print('False (weighted):', false, percent(false, total_hyphens)) if args.commit: project.save(args.project) print('...Committed!') else: print('...Projects NOT changed (use --commit flag to save changes)') print() return 0
def main_show(args): if not os.path.exists(args.project): print('Project file not found:', args.project) return -1 project = Project.load(args.project) print('Project file', args.project) print('\tcreated:', project.created) print('\tlast modified:', project.modified) print('\tmargins:', project.margins) print('\tdictionary size:', len(project.dictionary.keys())) #if project.ignore_weights: # print('\tdictionary weights were ignored (-i flag active)') print('\ttotal hyphens: (weighted)', project.total_hyphens) print('\ttotal non-hyphens: ', project.total_nonhyphens) print('\ttotal missed : (weighted)', project.missed, percent(project.missed, project.total_hyphens)) print('\ttotal false : (weighted)', project.false, percent(project.false, project.total_nonhyphens)) print('\tnumber of pattern levels:', len(project.patternset)) for i, layer in enumerate(project.patternset): if i & 1 == 0: print((i + 1), 'HYPHENATING patternset, num patterns:', len(layer)) else: print((i + 1), 'INHIBITING patternset, num patterns:', len(layer)) print('\tTrained with: range %r, selector %r' % (layer.patlen_range, layer.selector)) print() return 0
def main_compact(args): print('Compacting hyphenation patterns for', args.project) project = Project.load(args.project) before_compact = [ layer.compute_num_patterns() for layer in project.patternset ] project.patternset.compact() after_compact = [ layer.compute_num_patterns() for layer in project.patternset ] print('Result:') for level0, (before, after) in enumerate(zip(before_compact, after_compact)): print('\tLevel %s: %6d => %6d' % (level0 + 1, before, after)) if args.commit: project.save(args.project) print('...Committed') else: print('...Project NOT changed (use --commit flag to save changes)') print() return 0
def main_test(args): print('Testing', args.project, 'on dictionary', args.dictionary) project = Project.load(args.project) dictionary = Dictionary.load(args.dictionary) print('Performance of', args.project, 'on', args.dictionary) do_test(project, dictionary) if args.errors: with codecs.open(args.errors, 'w', 'utf-8') as f: for word, hyphens, missed, false in project.patternset.errors( dictionary, project.margins): f.write( format_dictionary_word(word, hyphens, missed, false) + '\n') print('Saved errors to', args.errors) if args.patterns: with codecs.open(args.patterns, 'w', 'utf-8') as f: for word, hyphens, missed, false in project.patternset.errors( dictionary, project.margins): f.write(format_word_as_pattern(word, missed, false) + '\n') print('Saved errors to', args.patterns) print() return 0
def main_explain(args): print('Explaining hyphenation of', args.input, 'into', args.output, 'using project', args.project) project = Project.load(args.project) with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f: with codecs.open(args.output or sys.stdout.fileno(), 'w', 'utf-8') as out: for word in f: word = word.strip() if not word: continue explain = Explain() prediction = project.patternset.hyphenate_explain( word, margins=project.margins, explain=explain) s = format_dictionary_word(word, prediction) out.write(s + '\n') out.write(s.encode('unicode-escape').decode('ascii') + '\n') out.write(explain.format() + '\n\n') print() return 0
def main_import(args): print('Loading patterns from', args.input, 'into project', args.project) project = Project.load(args.project) if len(project.patternset) > 0: print( 'ERROR: project already has some patterns. Can only load into empty project!' ) return -1 patterns = {} entered = False with codecs.open(args.input, 'r', 'utf-8') as f: for line in f: line = line.strip() line = line.split('%')[0] if not line: continue if line == '\\patterns{': entered = True elif entered and line == '}': break elif entered: text, control = PatternSet.parse_pattern(line) patterns[text] = control if patterns: maxlevel = 0 for control in patterns.values(): for level in control.values(): maxlevel = max(maxlevel, level) patlen = max(len(text) for text in patterns.keys()) for i in range(maxlevel): project.patternset.append( Layer(Range(1, patlen + 2), None, i & 2 == 1)) for text, control in patterns.items(): project.patternset.set_pattern_control(text, control) else: print('WARNING: patterns file is empty!') project.missed, project.false = do_test(project, project.dictionary) if args.commit: project.save(args.project) print('...Committed') else: print('...Project NOT changed (use --commit flag to save changes)') print() return 0
def main_import(args): print('Loading patterns from', args.input, 'into project', args.project) project = Project.load(args.project) if len(project.patternset) > 0: print('ERROR: project already has some patterns. Can only load into empty project!') return -1 patterns = {} entered = False with codecs.open(args.input, 'r', 'utf-8') as f: for line in f: line = line.strip() line = line.split('%')[0] if not line: continue if line == '\\patterns{': entered = True elif entered and line == '}': break elif entered: text, control = PatternSet.parse_pattern(line) patterns[text] = control if patterns: maxlevel = 0 for control in patterns.values(): for level in control.values(): maxlevel = max(maxlevel, level) patlen = max(len(text) for text in patterns.keys()) for i in range(maxlevel): project.patternset.append(Layer(Range(1, patlen+2), None, i & 2 == 1)) for text, control in patterns.items(): project.patternset.set_pattern_control(text, control) else: print('WARNING: patterns file is empty!') project.missed, project.false = do_test(project, project.dictionary) if args.commit: project.save(args.project) print('...Committed') else: print('...Project NOT changed (use --commit flag to save changes)') print() return 0
def main_new(args): print('Creating new project', args.project, 'from dictionary', args.dictionary) if os.path.exists(args.project): print('Project file already exists! Use different name or delete old project first. File %s' % args.project) return -1 if not os.path.exists(args.dictionary): print('Dictionary file not found', args.dictionary) return -1 dictionary = Dictionary.load(args.dictionary) if args.margins is None: print('Automatically computing hyphenation margins from dictionary') margins = dictionary.compute_margins() else: margins = Margins.parse(args.margins) project = Project(dictionary, margins) project.save(args.project) return main_show(args)
def main_export(args): print('Exporting patterns from', args.project, 'and saving them in TeX format to', args.output) if os.path.exists(args.output): print( 'Pattern file already exists! Delete it first, or change the name. Pattern file: %s' % args.output) return -1 project = Project.load(args.project) pattern_strings = list(project.patternset.pattern_strings()) exceptions = list( project.patternset.errors(project.dictionary, project.margins)) with codecs.open(args.output, 'w', 'utf-8') as f: f.write('\\patterns{\n') for patt in pattern_strings: f.write(patt + '\n') f.write('}\n') f.write('\\hyphenation{\n') for word, hyphens, _, _ in exceptions: text = format_dictionary_word(word, hyphens) f.write(text + '\n') f.write('}\n') print('Created TeX patterns file', args.output) print('Number of patterns:', len(pattern_strings)) print('Number of exceptions:', len(exceptions)) if args.patterns: print() with codecs.open(args.patterns, 'w', 'utf-8') as f: for patt in pattern_strings: f.write(patt + '\n') print('Written raw patterns to', args.patterns) if args.exceptions: print() with codecs.open(args.exceptions, 'w', 'utf-8') as f: for word, hyphens, _, _ in exceptions: text = format_dictionary_word(word, hyphens) f.write(text + '\n') print('Written raw exceptions to', args.exceptions) print() return 0
def get_heuristic(b, g, r, t, hfunc): # generating a set of patterns for the given parameters # initialising p = Project.load('bds') # input the training set d = p.dictionary.clone() s = Selector(g, b, t) range = Range(1, r) # generating patterns p.train_new_layer(range, s) # trains the pattern false = p.false # number of false positives by the patterns missed = p.missed # number of false negatives by the patterns p.dictionary = d # do we need this? return hfunc(false, missed) # evaluated by given heuristic function
def main_export(args): print('Exporting patterns from', args.project, 'and saving them in TeX format to', args.output) if os.path.exists(args.output): print('Pattern file already exists! Delete it first, or change the name. Pattern file: %s' % args.output) return -1 project = Project.load(args.project) pattern_strings = list(project.patternset.pattern_strings()) exceptions = list(project.patternset.errors(project.dictionary, project.margins)) with codecs.open(args.output, 'w', 'utf-8') as f: f.write('\\patterns{\n') for patt in pattern_strings: f.write(patt + '\n') f.write('}\n') f.write('\\hyphenation{\n') for word, hyphens, _, _ in exceptions: text = format_dictionary_word(word, hyphens) f.write(text + '\n') f.write('}\n') print('Created TeX patterns file', args.output) print('Number of patterns:', len(pattern_strings)) print('Number of exceptions:', len(exceptions)) if args.patterns: print() with codecs.open(args.patterns, 'w', 'utf-8') as f: for patt in pattern_strings: f.write(patt + '\n') print('Written raw patterns to', args.patterns) if args.exceptions: print() with codecs.open(args.exceptions, 'w', 'utf-8') as f: for word, hyphens, _, _ in exceptions: text = format_dictionary_word(word, hyphens) f.write(text + '\n') print('Written raw exceptions to', args.exceptions) print() return 0
def optimise_level_driver(b, g, r, t, hfunc_o, hfunc_e): level_bgr_map = dict() for i in xrange( 1, 10): # number of levels to perform. Start with odd level (1) if i % 2 == 0: # even level b, g, r = optimise_level.optimise_level(b, g, r, t, hfunc_e) else: # odd level b, g, r = optimise_level.optimise_level(b, g, r, t, hfunc_o) print(b, g, r) level_bgr_map[i] = (b, g, r) p = Project.load('bds') s = Selector(g, b, t) range = Range(1, r) l = p.train_new_layer(range, s) p.commit(l) print "Commiting Layer" return level_bgr_map
def main_hyphenate(args): print('Hyphenating', args.input, 'into', args.output, 'using project', args.project) project = Project.load(args.project) with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f: with codecs.open(args.output or sys.stdout.fileno(), 'w', 'utf-8') as out: for word in f: word = word.strip() if not word: continue prediction = project.patternset.hyphenate(word, margins=project.margins) s = format_dictionary_word(word, prediction) out.write(s + '\n') print() return 0
def test(self): dictionary = Dictionary.from_string(''' lo-rem ip-sum do-l-or sit a-met con-sec-te-tur adi-pis-cing elit ves-ti-bu-l-um eu-is-mod di-am eg-et bi-b-en-d-um ''') project = Project(dictionary) rng = Range.parse('1-2') selector = Selector.parse('1:1:1') project.train_new_layer(rng, selector) self.assertEqual(1, len(project.patternset)) self.assertEqual(project.missed, 0) self.assertEqual(project.false, 3) project.train_new_layer(rng, selector) self.assertEqual(2, len(project.patternset)) self.assertEqual(project.missed, 0) self.assertEqual(project.false, 0) patterns = list(project.patternset.pattern_strings()) self.assertEqual(patterns, [ '.e2', '1a', '1a1m', '1b', '1b1e', 'bi1', '1bu1', '1ci', 'co2', 'c1t', 'di1', 'do1', '1d1u', 'ec1', 'eg1', 'e2l', '1en1', 'es1', 'e1t', 'eu1', 'g1', 'g1e', 'i1a', 'i1b', 'is1', '1l', '2li', '1lo1', '1l1u', '1m', '1me', '1mo', '2n1', 'n1d1', '2n1s', 'o1', 'o1l1', 'o2n1', 'o1r', '1pi', 'p1s2', '1r', '1re', 's1c', '1se', 's1m', 's1t', '1s2u', '1t', '1te1', '1ti1', '1tu', 'u1', 'u1i', 'u1l1', '1um' ])
def main_compact(args): print('Compacting hyphenation patterns for', args.project) project = Project.load(args.project) before_compact = [layer.compute_num_patterns() for layer in project.patternset] project.patternset.compact() after_compact = [layer.compute_num_patterns() for layer in project.patternset] print('Result:') for level0, (before, after) in enumerate(zip(before_compact, after_compact)): print('\tLevel %s: %6d => %6d' % (level0+1, before, after)) if args.commit: project.save(args.project) print('...Committed') else: print('...Project NOT changed (use --commit flag to save changes)') print() return 0
def main_hyphenate(args): print('Hyphenating', args.input, 'into', args.output, 'using project', args.project) project = Project.load(args.project) with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f: with codecs.open(args.output or sys.stdout.fileno(), 'w', 'utf-8') as out: for word in f: word = word.strip() if not word: continue prediction = project.patternset.hyphenate( word, margins=project.margins) s = format_dictionary_word(word, prediction) out.write(s + '\n') print() return 0
def main_test(args): print('Testing', args.project, 'on dictionary', args.dictionary) project = Project.load(args.project) dictionary = Dictionary.load(args.dictionary) print('Performance of', args.project, 'on', args.dictionary) do_test(project, dictionary) if args.errors: with codecs.open(args.errors, 'w', 'utf-8') as f: for word, hyphens, missed, false in project.patternset.errors(dictionary, project.margins): f.write(format_dictionary_word(word, hyphens, missed, false) + '\n') print('Saved errors to', args.errors) if args.patterns: with codecs.open(args.patterns, 'w', 'utf-8') as f: for word, hyphens, missed, false in project.patternset.errors(dictionary, project.margins): f.write(format_word_as_pattern(word, missed, false) + '\n') print('Saved errors to', args.patterns) print() return 0
def main_train(args): print('Training project', args.project, 'using range', args.range, 'and selector', args.selector) project = Project.load(args.project) if len(project.patternset) & 1: print('Training INHIBINTING pattern layer (level=%s)' % (len(project.patternset) + 1)) else: print('Training HYPHENATION pattern layer (level=%s)' % (len(project.patternset) + 1)) patlen_rng = Range.parse(args.range) selector = Selector.parse(args.selector) print('\tpattern lengths:', patlen_rng) print('\tselector:', args.selector) total_hyphens = project.total_hyphens layer = project.train_new_layer(patlen_rng, selector) missed, false = project.missed, project.false print('Missed (weighted):', missed, percent(missed, total_hyphens)) print('False (weighted):', false, percent(false, project.total_nonhyphens)) if args.commit: project.commit(layer) project.save(args.project) print('...Committed!') else: print('...Projects NOT changed (use --commit flag to save changes)') print() return 0
def main_explain(args): print('Explaining hyphenation of', args.input, 'into', args.output, 'using project', args.project) project = Project.load(args.project) with codecs.open(args.input or sys.stdin.fileno(), 'r', 'utf-8') as f: with codecs.open(args.output or sys.stdout.fileno(), 'w', 'utf-8') as out: for word in f: word = word.strip() if not word: continue explain = Explain() prediction = project.patternset.hyphenate_explain(word, margins=project.margins, explain=explain) s = format_dictionary_word(word, prediction) out.write(s + '\n') out.write(s.encode('unicode-escape').decode('ascii') + '\n') out.write(explain.format() + '\n\n') print() return 0
from patgen.project import Project from patgen.range import Range from patgen.selector import Selector import csv import pprint p = Project.load('bds') csvw = csv.writer(open('bds.csv', 'w')) for r1 in xrange(3, 4): for g1 in xrange(1, 5): for b1 in xrange(1, 5): rg1 = Range(1, r1) s1 = Selector(g1, b1, 10) d1 = p.dictionary.clone() p.train_new_layer(rg1, s1) for r2 in xrange(3, 4): for g2 in xrange(1, 5): for b2 in xrange(1, 5): rg2 = Range(1, r2) s2 = Selector(g2, b2, 10) d2 = p.dictionary.clone() p.train_new_layer(rg2, s2) num_patterns = sum(l.compute_num_patterns() for l in p.patternset) csvw.writerow((r1, g1, b1, r2, g2, b2, num_patterns, p.missed, p.false)) p.patternset.pop() p.dictionary = d2