def test_parse_should_return_a_list_with_the_blanks_contents(): template = Templater() template.learn('a b d') template.learn('a e d') result = template.parse('a b c d') expected = ['', 'b c', ''] assert result == expected
def create_header(message, filename): """ Create header script. :param message: (dict) pipeline parameters :param filename: (str) filename with absolute path :return: """ try: header_template_path = "{0}{1}/header.txt".format( top_level_directory, TEMPLATE_PATH["TEMPLATE"]) logger.info( "header_template_path : '{0}'.".format(header_template_path)) header_pattern = { "$SPARK_CODE_DEPEDENCIES_URL": CONFIG["SPARK_CODE_DEPEDENCIES_URL"], "$PIPELINE": str(message) } #print ('header_pattern : ', header_pattern) header_str = Templater.load_template(header_template_path) header_str = Templater.replace_word(header_str, header_pattern) Templater.save_as(header_str, filename) except FileExistsError as fee: raise fee except FileNotFoundError as fnfe: raise fnfe except Exception as er: raise er return True
def test_Templater_save_should_save_template_as_a_raw_file_with_markers(): processed_template = [None, '<b>', None, '</b><u>', None, '</u>', None] t = Templater(template=processed_template) t.save('test.html', marker='|||') result = read_file_and_delete('test.html') expected = '|||<b>|||</b><u>|||</u>|||\n' assert expected == result
def test_should_be_able_to_adjust_minimum_size_of_a_block(): t = Templater(min_block_size=2) t.learn('git and pyth') t.learn('eggs and spam') expected = [None, ' and ', None] result = t._template assert expected == result
def test_save_should_use_python_format_if_marker_is_supplied_and_template_has_named_markers(): t = Templater(template='{{start}}<u>{{text}}</u>{{end}}', marker=regexp_marker) t.save('test.html', marker='[--{}--]') result = read_file_and_delete('test.html') expected = '[--start--]<u>[--text--]</u>[--end--]\n' assert expected == result
def test_add_headers_should_raise_ValueError_if_number_of_blanks_differ_from_number_of_headers(): t = Templater(template='|||<u>|||</u>|||', marker='|||') try: t.add_headers(['one', 'two', 'three', 'four']) except ValueError: pass else: assert 'ValueError not raised!' == False
def test_join_should_fill_the_blanks_with_elements_received(): template = Templater() template.learn('a b d') template.learn('a e d') parsed = template.parse('a b c d') result = template.join(parsed) expected = 'a b c d' assert result == expected
def test_new_learn_text_trying_to_delete_some_variable(): template = Templater() template.learn('<b> a and b </b>') template.learn('<b> c and d </b>') template.learn('<b> e and </b>') result = template._template expected = [None, '<b> ', None, ' and ', None, ' </b>', None] assert result == expected
def test_save_should_use_NAMED_MARKER_if_template_has_named_markers_and_no_marker_supplied(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) t.save('test.html') result = read_file_and_delete('test.html') named_markers = [NAMED_MARKER.format(header) for header in t._headers] expected = t.join(named_markers) + '\n' assert expected == result
def test_template_with_named_markers_should_not_be_able_to_learn(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) try: t.learn('a<u>b</u>c') except NotImplementedError: pass else: print t._template assert 'NotImplementedError not raised' == False
def test_template_with_named_markers_should_not_be_able_to_learn(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) try: t.learn('a<u>b</u>c') except NotImplementedError: pass else: print(t._template) assert 'NotImplementedError not raised' == False
def test_passing_headers_with_different_size_from_self_headers_should_raise_AttributeError(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) try: t.save('test.html', headers=list('abcde')) except AttributeError: pass else: unlink('test.html') raise 'AttributeError not raised!'
def test_join_with_less_parameters_than_variables_should_raise_AttributeError(): template = Templater() template.learn('a b d') template.learn('a e d') try: result = template.join(['']) except AttributeError: pass else: assert 'AttributeError not raised!' == False
def test_Templater_dump_and_load_should_pickle_and_unpickle(): processed_template = [None, '<b>', None, '</b><u>', None, '</u>', None] template = Templater(template=processed_template, min_block_size=6) template.dump('my-template.tpl') t2 = Templater.load('my-template.tpl') unlink('my-template.tpl') result_1 = t2._template expected_1 = processed_template result_2 = t2._min_block_size expected_2 = 6 assert expected_1 == result_1 assert expected_2 == result_2
def test_Templater_open_should_load_template_from_a_raw_file_with_markers(): write_file('test.html', '|||<b>|||</b><u>|||</u>|||') t = Templater.open('test.html', marker='|||') unlink('test.html') result = t._template expected = [None, '<b>', None, '</b><u>', None, '</u>', None] assert expected == result
def get_shows_from_html(html_content): regexp_marker = re.compile(r'{{([a-zA-Z0-9_-]*)}}') regexp_tags = re.compile(r'<[^>]*?>') template_fp = open(os.path.join(PROJECT_ROOT, 'src', 'template.html')) template = Templater(template_fp.read().strip(), marker=regexp_marker) data = template.parse(html_content) shows = [] for evento in data['info'].split('</blockquote>'): raw_show = regexp_tags.sub('', evento).strip().split('\n') try: shows.append(Show.from_raw_data(raw_show)) except ValueError: pass return shows
def test_Templater_parse_file_should_open_and_parse_a_file_from_filename(): template = Templater('+<u>+</u>+', marker='+') fp = open('test.html', 'w') fp.write('testing <u> parsing </u> files\n') fp.close() result_1 = template.parse_file('test.html') expected = ['testing ', ' parsing ', ' files'] unlink('test.html') assert expected == result_1 fp = open('test.html', 'w') fp.write('testing <u> parsing </u> files\r\n') fp.close() result_2 = template.parse_file('test.html') unlink('test.html') assert expected == result_2
def test_should_not_have_named_marks_without_nothing_in_the_middle(): write_file('test.html', '{{first}}{{second}}<u>{{text}}</u>{{last}}') try: t = Templater.open('test.html', marker=regexp_marker) except ValueError: pass else: assert "ValueError not raised!" == False
def test_Templater_open_should_remove_leading_linefeed_if_there_is_some(): fp = open('test.html', 'w') fp.write('|||<b>|||</b><u>|||</u>|||\n') fp.close() t = Templater.open('test.html', marker='|||') unlink('test.html') result_1 = t._template expected = [None, '<b>', None, '</b><u>', None, '</u>', None] assert expected == result_1 fp = open('test.html', 'w') fp.write('|||<b>|||</b><u>|||</u>|||\r\n') fp.close() t = Templater.open('test.html', marker='|||') unlink('test.html') result_2 = t._template assert expected == result_2
def run(self, deploy): c = self.config self.__prepare_deploy(deploy) tg = TextGenerator(c.gen_capitalize, c.gen_shuffle) variants = tg.generate(self.text) print "[OK] Generated {0} variants of the text".format(len(variants)) kwh = KWHandler(c.keys_file, c.gen_keyword, c.gen_grouping) print "[OK] Read {0} keywords".format(kwh.count()) data = kwh.get_dg_data(variants) dgdata = DgData(data, self.categories, footer_links = c.footer_links, pages_in_category = c.pages_in_category) t = Templater(template_folder = c.template_folder, templates = c.templates, tgdata = dgdata, deploy_folder = self.deploy_folder, sitename = c.sitename.decode("utf8"), words_in_preview = c.words_in_preview ) t.serialize()
def test_raise_ValueError_if_there_is_no_named_marker_in_the_end_of_template(): write_file('test.html', '{{start}}<u>{{text}}</u>') try: t = Templater.open('test.html', marker=regexp_marker) except ValueError: unlink('test.html') else: unlink('test.html') assert "ValueError not raised!" == False
def test_Templater_should_accept_named_markers_in_init(): template = '{{start}}<b>{{middle}}</b>{{end}}' t = Templater(template=template, marker=regexp_marker) result_1 = t._template expected_1 = [None, '<b>', None, '</b>', None] assert expected_1 == result_1 result_2 = t._headers expected_2 = ['start', 'middle', 'end'] assert expected_2 == result_2
def run(self, deploy): c = self.config self.__prepare_deploy(deploy) tg = TextGenerator(c.gen_capitalize, c.gen_shuffle) variants = tg.generate(self.text) print "[OK] Generated {0} variants of the text".format(len(variants)) kwh = KWHandler(c.keys_file, c.gen_keyword, c.gen_grouping) print "[OK] Read {0} keywords".format(kwh.count()) data = kwh.get_dg_data(variants) dgdata = DgData(data, self.categories, footer_links=c.footer_links, pages_in_category=c.pages_in_category) t = Templater(template_folder=c.template_folder, templates=c.templates, tgdata=dgdata, deploy_folder=self.deploy_folder, sitename=c.sitename.decode("utf8"), words_in_preview=c.words_in_preview) t.serialize()
def create_footer(message, filename): """ Create footer script. :param message: (dict) pipeline parameters :param filename: (str) filename with absolute path :return: """ try: footer_template_path = "{0}{1}/footer.txt".format( top_level_directory, TEMPLATE_PATH["TEMPLATE"]) if message["last_stage"] == "fit": if message["type"] == "GeneralPipeline": footer_pattern = FOOTER_PATTERN["GP"] elif message["type"] == "CrossValidator": footer_pattern = FOOTER_PATTERN["CV"] elif message["type"] == "TrainValidationSplit": footer_pattern = FOOTER_PATTERN["TVS"] else: raise KeyError("Unknown key 'type' in message.") elif message["last_stage"] == "transform": footer_pattern = FOOTER_PATTERN["TRANSFORM"] else: raise KeyError("Unknown key 'last_stage' in message.") footer_str = Templater.load_template(footer_template_path) footer_str = Templater.replace_word(footer_str, footer_pattern) Templater.save_as(footer_str, filename, "a") except FileExistsError as fee: raise fee except FileNotFoundError as fnfe: raise fnfe except Exception as er: raise er return True
def test_named_markers_should_work(): write_file('test.html', '|||[first]<b>|||[second]</b><u>|||[third]</u>|||[fourth]') t = Templater.open('test.html', marker=re_compile(r'\|\|\|\[([^\]]+)\]')) unlink('test.html') result_1 = t._template expected_1 = [None, '<b>', None, '</b><u>', None, '</u>', None] assert result_1 == expected_1 result_2 = t.parse('<b>hello</b><u>world</u>') expected_2 = {'first': '', 'second': 'hello', 'third': 'world', 'fourth': ''} assert expected_2 == result_2
class TestTemplateInsertion(unittest.TestCase): templater = Templater() templater.add_templates({ 'test': { 'this': 'labradoodle', 'that': 'pug', 'header': '<header><strong>This is the top</strong></header>', 'site': 'http://www.example.com' } }) def test_replace_at_delimiters_blocks(self): self.assertEqual('labradoodle', self.templater.fill('{{ test.this }}')) self.assertEqual('pug', self.templater.fill('{{test.that}}')) def test_replace_at_delimiters_inline(self): self.assertEqual( 'Replace labradoodle with a dog.', self.templater.fill('Replace {{ test.this }} with a dog.')) self.assertEqual( 'pug is a dog, as well as labradoodle', self.templater.fill( '{{test.that}} is a dog, as well as {{test.this}}')) def test_throw_on_missing_template_group(self): with self.assertRaises(Exception): self.templater.fill('Replace {{ missing.this }} with a dog.') def test_throw_on_missing_template_item(self): with self.assertRaises(Exception): self.templater.fill('Replace {{ test.dogdog }} with a dog.') def test_html_insertion(self): self.assertEqual('<header><strong>This is the top</strong></header>', self.templater.fill('{{ test.header }}')) def test_non_templates(self): self.assertEqual( '{this is a normal one}, while { { this is not } }, and maybe {{ something without a proper close } }.', self.templater.fill( '{this is a normal one}, while { { this is not } }, and maybe {{ something without a proper close } }.' )) def test_internal_links(self): self.assertEqual( 'Check out my page at [http://www.example.com/thispage/index.html](my website).', self.templater.fill( 'Check out my page at [{{test.site}}/thispage/index.html](my website).' ))
def test_save_should_use_headers_instead_of_self_headers_if_supplied(): t = Templater(template='{{one}}<u>{{two}}</u>{{three}}', marker=regexp_marker) t.save('test.html', headers=list('abc')) result_1 = read_file_and_delete('test.html') named_markers = [NAMED_MARKER.format(header) for header in list('abc')] expected_1 = t.join(named_markers) + '\n' assert expected_1 == result_1 t.save('test.html', marker='[--{}--]', headers=list('abc')) result_2 = read_file_and_delete('test.html') expected_2 = '[--a--]<u>[--b--]</u>[--c--]' + '\n' assert expected_2 == result_2
def find_splits(self, column: List[Cell]) -> Iterable[CompoundSplit]: from templater import Templater column = [c.get("text", "") for c in column] template = Templater(min_block_size=self.min_block_size) for cell in column: try: template.learn(cell) except: log.debug(f"Failed to add {cell} to template") return if any(template._template): log.debug(f"Template found: {template._template}") try: newrows = [] for cell in column: newrows.append(map(str.strip, template.parse(cell))) newcols = zip(*newrows) if newcols: for i, newcol in enumerate(newcols): if self.col_is_ok(newcol): prefix = template._template[i].strip() if prefix: if prefix.isnumeric(): # TODO: check numeric suffix newcol = tuple([prefix + c for c in newcol]) prefix = str(i) else: prefix = str(i) yield CompoundSplit( prefix, "string", [{"text": c} for c in newcol] ) except Exception as e: log.debug(f"Failed to parse {cell} using template {template._template}")
#!/usr/bin/env python # coding: utf-8 from templater import Templater str_1 = 'my favorite color is blue' str_2 = 'my favorite color is violet' print 'Learning from:' print ' ', str_1 print ' ', str_2 t = Templater() # default min_block_size = 1 t.learn(str_1) t.learn(str_2) print 'Template for min_block_size=1 (default):' print ' ', t._template t = Templater(min_block_size=2) t.learn(str_1) t.learn(str_2) print 'Template for min_block_size=2:' print ' ', t._template
#!/usr/bin/env python # coding: utf-8 from time import time from glob import glob from templater import Templater files = glob('html/*.html') # You must have some .html files in html/ template = Templater() print('Time to learn') start = time() for filename in files: print(' Learning "%s"...' % filename,) fp = open(filename) template.learn(fp.read()) fp.close() print('OK') end = time() print(' Time:', end - start) print('Template created:') print(template._template) print('Now, work!') start = time() for filename in files: print(' Parsing "%s"...' % filename) fp = open(filename) print(' Result:', template.parse(fp.read())) fp.close()
#!/usr/bin/env python # coding: utf-8 from templater import Templater template = Templater(template='<b>||| and |||</b>', marker='|||') print template.join(['', 'red', 'blue', '']) # prints '<b>red and blue</b>'
#!/usr/bin/env python # coding: utf-8 from os import unlink from templater import Templater t = Templater() t.learn('<b>spam</b>') t.learn('<b>eggs</b>') t.learn('<b>ham</b>') t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF t.dump('my-template.tpl') print(t.parse('<b>parsing using first template object</b>')) t2 = Templater.open('my-little-template.html', marker='|||') # it removes `\n`/`\r\n` in the end of file before creating template definition print(t2.parse('<b>parsing using second template object</b>')) t3 = Templater.load('my-template.tpl') print(t3.parse('<b>parsing using third template object</b>')) # 'my-little-template.html' will have the template string with blanks filled by # '|||' # 'my-template.tpl' will have the pickle of Templater object # Removing files: unlink('my-little-template.html') unlink('my-template.tpl')
#!/usr/bin/env python # coding: utf-8 from os import unlink from templater import Templater t = Templater() t.learn('<b>spam</b>') t.learn('<b>eggs</b>') t.learn('<b>ham</b>') t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF t.dump('my-template.tpl') print t.parse('<b>parsing using first template object</b>') t2 = Templater.open('my-little-template.html', marker='|||') # it removes `\n`/`\r\n` in the end of file before creating template definition print t2.parse('<b>parsing using second template object</b>') t3 = Templater.load('my-template.tpl') print t3.parse('<b>parsing using third template object</b>') # 'my-little-template.html' will have the template string with blanks filled by # '|||' # 'my-template.tpl' will have the pickle of Templater object # Removing files: unlink('my-little-template.html') unlink('my-template.tpl')
#!/usr/bin/env python # coding: utf-8 from templater import Templater template = Templater(template=[None, '<b>', None, '</b>', None]) print template.join(['', 'Python rules', '']) # prints '<b>Python rules</b>'
#!/usr/bin/env python # coding: utf-8 from time import time from glob import glob from templater import Templater files = glob('html/*.html') # You must have some .html files in html/ template = Templater() print 'Time to learn' start = time() for filename in files: print ' Learning "%s"...' % filename, fp = open(filename) template.learn(fp.read()) fp.close() print 'OK' end = time() print ' Time:', end - start print 'Template created:' print template._template print 'Now, work!' start = time() for filename in files: print ' Parsing "%s"...' % filename fp = open(filename) print ' Result:', template.parse(fp.read()) fp.close()
def test_save_should_use_self_marker_if_no_marker_supplied(): t = Templater(template='+<u>+</u>+', marker='+') t.save('test.html') result = read_file_and_delete('test.html') expected = '+<u>+</u>+\n' assert expected == result
def test_save_should_use_marker_if_supplied_and_template_hasnt_named_markers(): t = Templater(template='+<u>+</u>+', marker='+') t.save('test.html', marker='%%') result = read_file_and_delete('test.html') expected = '%%<u>%%</u>%%\n' assert expected == result
def test_should_be_able_to_add_headers_to_a_template_without_named_markers(): t = Templater(template='|||<u>|||</u>|||', marker='|||') t.add_headers(['one', 'two', 'three']) result = t.parse('a<u>b</u>c') expected = {'one': 'a', 'two': 'b', 'three': 'c'} assert result == expected
def test_Templater_should_optionally_import_pre_processed_template(): pre_processed = [None, '<u>', None, '</u>', None] template = Templater(template=pre_processed) assert template._template == pre_processed assert template.join(['', 'python', '']) == '<u>python</u>'
def test_Templater_should_optionally_import_template_as_string_with_marks(): template = Templater(template='<b>|||</b>', marker='|||') result_template = template._template assert result_template == [None, '<b>', None, '</b>', None] assert template.join(['', 'spam eggs', '']) == '<b>spam eggs</b>'