def test_parse_should_return_a_list_with_the_blanks_contents(): template = Templater() template.learn('a b d') template.learn('a e d') result = template.parse('a b c d') expected = ['', 'b c', ''] assert result == expected
def test_join_should_fill_the_blanks_with_elements_received(): template = Templater() template.learn('a b d') template.learn('a e d') parsed = template.parse('a b c d') result = template.join(parsed) expected = 'a b c d' assert result == expected
def get_shows_from_html(html_content): regexp_marker = re.compile(r'{{([a-zA-Z0-9_-]*)}}') regexp_tags = re.compile(r'<[^>]*?>') template_fp = open(os.path.join(PROJECT_ROOT, 'src', 'template.html')) template = Templater(template_fp.read().strip(), marker=regexp_marker) data = template.parse(html_content) shows = [] for evento in data['info'].split('</blockquote>'): raw_show = regexp_tags.sub('', evento).strip().split('\n') try: shows.append(Show.from_raw_data(raw_show)) except ValueError: pass return shows
def find_splits(self, column: List[Cell]) -> Iterable[CompoundSplit]: from templater import Templater column = [c.get("text", "") for c in column] template = Templater(min_block_size=self.min_block_size) for cell in column: try: template.learn(cell) except: log.debug(f"Failed to add {cell} to template") return if any(template._template): log.debug(f"Template found: {template._template}") try: newrows = [] for cell in column: newrows.append(map(str.strip, template.parse(cell))) newcols = zip(*newrows) if newcols: for i, newcol in enumerate(newcols): if self.col_is_ok(newcol): prefix = template._template[i].strip() if prefix: if prefix.isnumeric(): # TODO: check numeric suffix newcol = tuple([prefix + c for c in newcol]) prefix = str(i) else: prefix = str(i) yield CompoundSplit( prefix, "string", [{"text": c} for c in newcol] ) except Exception as e: log.debug(f"Failed to parse {cell} using template {template._template}")
from time import time from glob import glob from templater import Templater files = glob('html/*.html') # You must have some .html files in html/ template = Templater() print('Time to learn') start = time() for filename in files: print(' Learning "%s"...' % filename,) fp = open(filename) template.learn(fp.read()) fp.close() print('OK') end = time() print(' Time:', end - start) print('Template created:') print(template._template) print('Now, work!') start = time() for filename in files: print(' Parsing "%s"...' % filename) fp = open(filename) print(' Result:', template.parse(fp.read())) fp.close() end = time() print(' Time: ', end - start)
#!/usr/bin/env python # coding: utf-8 from os import unlink from templater import Templater t = Templater() t.learn('<b>spam</b>') t.learn('<b>eggs</b>') t.learn('<b>ham</b>') t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF t.dump('my-template.tpl') print(t.parse('<b>parsing using first template object</b>')) t2 = Templater.open('my-little-template.html', marker='|||') # it removes `\n`/`\r\n` in the end of file before creating template definition print(t2.parse('<b>parsing using second template object</b>')) t3 = Templater.load('my-template.tpl') print(t3.parse('<b>parsing using third template object</b>')) # 'my-little-template.html' will have the template string with blanks filled by # '|||' # 'my-template.tpl' will have the pickle of Templater object # Removing files: unlink('my-little-template.html') unlink('my-template.tpl')
#!/usr/bin/env python # coding: utf-8 from os import unlink from templater import Templater t = Templater() t.learn('<b>spam</b>') t.learn('<b>eggs</b>') t.learn('<b>ham</b>') t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF t.dump('my-template.tpl') print t.parse('<b>parsing using first template object</b>') t2 = Templater.open('my-little-template.html', marker='|||') # it removes `\n`/`\r\n` in the end of file before creating template definition print t2.parse('<b>parsing using second template object</b>') t3 = Templater.load('my-template.tpl') print t3.parse('<b>parsing using third template object</b>') # 'my-little-template.html' will have the template string with blanks filled by # '|||' # 'my-template.tpl' will have the pickle of Templater object # Removing files: unlink('my-little-template.html') unlink('my-template.tpl')
#!/usr/bin/env python # coding: utf-8 from re import compile as re_compile from templater import Templater regexp_marker = re_compile(r'{{([a-zA-Z0-9_-]*)}}') # match ''{{var}}'' template = Templater('{{first-var}}<b>{{second-var}}</b>{{third-var}}', marker=regexp_marker) # regexp marker also works for Templater.open to specify named markers result = template.parse('This <b> is </b> a test.') # returns a dict print(result) template.save('template-with-named-markers.html', marker='{{{{{}}}}}')
from time import time from glob import glob from templater import Templater files = glob('html/*.html') # You must have some .html files in html/ template = Templater() print 'Time to learn' start = time() for filename in files: print ' Learning "%s"...' % filename, fp = open(filename) template.learn(fp.read()) fp.close() print 'OK' end = time() print ' Time:', end - start print 'Template created:' print template._template print 'Now, work!' start = time() for filename in files: print ' Parsing "%s"...' % filename fp = open(filename) print ' Result:', template.parse(fp.read()) fp.close() end = time() print ' Time: ', end - start
#!/usr/bin/env python # coding: utf-8 from templater import Templater texts_to_learn = [ '<b> spam and eggs </b>', '<b> ham and spam </b>', '<b> white and black </b>' ] text_to_parse = texts_to_learn[-1] template = Templater() for text in texts_to_learn: print('Learning "%s"...' % text) template.learn(text) print('Template created:', template._template) print('Parsing text "%s"...' % text_to_parse) print(' Result:', template.parse(text_to_parse)) print('Filling the blanks:', template.join(['', 'yellow', 'blue', '']))
def test_should_be_able_to_add_headers_to_a_template_without_named_markers(): t = Templater(template='|||<u>|||</u>|||', marker='|||') t.add_headers(['one', 'two', 'three']) result = t.parse('a<u>b</u>c') expected = {'one': 'a', 'two': 'b', 'three': 'c'} assert result == expected
#!/usr/bin/env python # coding: utf-8 from templater import Templater texts_to_learn = ['<b> spam and eggs </b>', '<b> ham and spam </b>', '<b> white and black </b>'] text_to_parse = texts_to_learn[-1] template = Templater() for text in texts_to_learn: print 'Learning "%s"...' % text template.learn(text) print 'Template created:', template._template print 'Parsing text "%s"...' % text_to_parse print ' Result:', template.parse(text_to_parse) print 'Filling the blanks:', template.join(['', 'yellow', 'blue', ''])
#!/usr/bin/env python # coding: utf-8 from re import compile as re_compile from templater import Templater regexp_marker = re_compile(r'{{([a-zA-Z0-9_-]*)}}') # match ''{{var}}'' template = Templater('{{first-var}}<b>{{second-var}}</b>{{third-var}}', marker=regexp_marker) # regexp marker also works for Templater.open to specify named markers result = template.parse('This <b> is </b> a test.') # returns a dict print result template.save('template-with-named-markers.html', marker='{{{{{}}}}}')