def test_parse_should_return_a_list_with_the_blanks_contents(): template = Templater() template.learn('a b d') template.learn('a e d') result = template.parse('a b c d') expected = ['', 'b c', ''] assert result == expected
def test_should_be_able_to_adjust_minimum_size_of_a_block(): t = Templater(min_block_size=2) t.learn('git and pyth') t.learn('eggs and spam') expected = [None, ' and ', None] result = t._template assert expected == result
def test_join_should_fill_the_blanks_with_elements_received(): template = Templater() template.learn('a b d') template.learn('a e d') parsed = template.parse('a b c d') result = template.join(parsed) expected = 'a b c d' assert result == expected
def test_new_learn_text_trying_to_delete_some_variable(): template = Templater() template.learn('<b> a and b </b>') template.learn('<b> c and d </b>') template.learn('<b> e and </b>') result = template._template expected = [None, '<b> ', None, ' and ', None, ' </b>', None] assert result == expected
def test_template_with_named_markers_should_not_be_able_to_learn(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) try: t.learn('a<u>b</u>c') except NotImplementedError: pass else: print(t._template) assert 'NotImplementedError not raised' == False
def test_join_with_less_parameters_than_variables_should_raise_AttributeError(): template = Templater() template.learn('a b d') template.learn('a e d') try: result = template.join(['']) except AttributeError: pass else: assert 'AttributeError not raised!' == False
def test_template_with_named_markers_should_not_be_able_to_learn(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) try: t.learn('a<u>b</u>c') except NotImplementedError: pass else: print t._template assert 'NotImplementedError not raised' == False
def find_splits(self, column: List[Cell]) -> Iterable[CompoundSplit]: from templater import Templater column = [c.get("text", "") for c in column] template = Templater(min_block_size=self.min_block_size) for cell in column: try: template.learn(cell) except: log.debug(f"Failed to add {cell} to template") return if any(template._template): log.debug(f"Template found: {template._template}") try: newrows = [] for cell in column: newrows.append(map(str.strip, template.parse(cell))) newcols = zip(*newrows) if newcols: for i, newcol in enumerate(newcols): if self.col_is_ok(newcol): prefix = template._template[i].strip() if prefix: if prefix.isnumeric(): # TODO: check numeric suffix newcol = tuple([prefix + c for c in newcol]) prefix = str(i) else: prefix = str(i) yield CompoundSplit( prefix, "string", [{"text": c} for c in newcol] ) except Exception as e: log.debug(f"Failed to parse {cell} using template {template._template}")
#!/usr/bin/env python # coding: utf-8 from time import time from glob import glob from templater import Templater files = glob('html/*.html') # You must have some .html files in html/ template = Templater() print('Time to learn') start = time() for filename in files: print(' Learning "%s"...' % filename,) fp = open(filename) template.learn(fp.read()) fp.close() print('OK') end = time() print(' Time:', end - start) print('Template created:') print(template._template) print('Now, work!') start = time() for filename in files: print(' Parsing "%s"...' % filename) fp = open(filename) print(' Result:', template.parse(fp.read())) fp.close()
#!/usr/bin/env python # coding: utf-8 from templater import Templater str_1 = 'my favorite color is blue' str_2 = 'my favorite color is violet' print 'Learning from:' print ' ', str_1 print ' ', str_2 t = Templater() # default min_block_size = 1 t.learn(str_1) t.learn(str_2) print 'Template for min_block_size=1 (default):' print ' ', t._template t = Templater(min_block_size=2) t.learn(str_1) t.learn(str_2) print 'Template for min_block_size=2:' print ' ', t._template
#!/usr/bin/env python # coding: utf-8 from os import unlink from templater import Templater t = Templater() t.learn('<b>spam</b>') t.learn('<b>eggs</b>') t.learn('<b>ham</b>') t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF t.dump('my-template.tpl') print(t.parse('<b>parsing using first template object</b>')) t2 = Templater.open('my-little-template.html', marker='|||') # it removes `\n`/`\r\n` in the end of file before creating template definition print(t2.parse('<b>parsing using second template object</b>')) t3 = Templater.load('my-template.tpl') print(t3.parse('<b>parsing using third template object</b>')) # 'my-little-template.html' will have the template string with blanks filled by # '|||' # 'my-template.tpl' will have the pickle of Templater object # Removing files: unlink('my-little-template.html') unlink('my-template.tpl')
#!/usr/bin/env python # coding: utf-8 from os import unlink from templater import Templater t = Templater() t.learn('<b>spam</b>') t.learn('<b>eggs</b>') t.learn('<b>ham</b>') t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF t.dump('my-template.tpl') print t.parse('<b>parsing using first template object</b>') t2 = Templater.open('my-little-template.html', marker='|||') # it removes `\n`/`\r\n` in the end of file before creating template definition print t2.parse('<b>parsing using second template object</b>') t3 = Templater.load('my-template.tpl') print t3.parse('<b>parsing using third template object</b>') # 'my-little-template.html' will have the template string with blanks filled by # '|||' # 'my-template.tpl' will have the pickle of Templater object # Removing files: unlink('my-little-template.html') unlink('my-template.tpl')
#!/usr/bin/env python # coding: utf-8 from time import time from glob import glob from templater import Templater files = glob('html/*.html') # You must have some .html files in html/ template = Templater() print 'Time to learn' start = time() for filename in files: print ' Learning "%s"...' % filename, fp = open(filename) template.learn(fp.read()) fp.close() print 'OK' end = time() print ' Time:', end - start print 'Template created:' print template._template print 'Now, work!' start = time() for filename in files: print ' Parsing "%s"...' % filename fp = open(filename) print ' Result:', template.parse(fp.read()) fp.close()
#!/usr/bin/env python # coding: utf-8 from templater import Templater texts_to_learn = [ '<b> spam and eggs </b>', '<b> ham and spam </b>', '<b> white and black </b>' ] text_to_parse = texts_to_learn[-1] template = Templater() for text in texts_to_learn: print('Learning "%s"...' % text) template.learn(text) print('Template created:', template._template) print('Parsing text "%s"...' % text_to_parse) print(' Result:', template.parse(text_to_parse)) print('Filling the blanks:', template.join(['', 'yellow', 'blue', '']))
#!/usr/bin/env python # coding: utf-8 from templater import Templater texts_to_learn = ['<b> spam and eggs </b>', '<b> ham and spam </b>', '<b> white and black </b>'] text_to_parse = texts_to_learn[-1] template = Templater() for text in texts_to_learn: print 'Learning "%s"...' % text template.learn(text) print 'Template created:', template._template print 'Parsing text "%s"...' % text_to_parse print ' Result:', template.parse(text_to_parse) print 'Filling the blanks:', template.join(['', 'yellow', 'blue', ''])