예제 #1
0
def test_parse_should_return_a_list_with_the_blanks_contents():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    result = template.parse('a b c d')
    expected = ['', 'b c', '']
    assert result == expected
예제 #2
0
def test_parse_should_return_a_list_with_the_blanks_contents():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    result = template.parse('a b c d')
    expected = ['', 'b c', '']
    assert result == expected
예제 #3
0
def test_join_should_fill_the_blanks_with_elements_received():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    parsed = template.parse('a b c d')
    result = template.join(parsed)
    expected = 'a b c d'
    assert result == expected
예제 #4
0
def test_join_should_fill_the_blanks_with_elements_received():
    template = Templater()
    template.learn('a b d')
    template.learn('a e d')
    parsed = template.parse('a b c d')
    result = template.join(parsed)
    expected = 'a b c d'
    assert result == expected
예제 #5
0
def get_shows_from_html(html_content):
    regexp_marker = re.compile(r'{{([a-zA-Z0-9_-]*)}}')
    regexp_tags = re.compile(r'<[^>]*?>')
    template_fp = open(os.path.join(PROJECT_ROOT, 'src', 'template.html'))
    template = Templater(template_fp.read().strip(), marker=regexp_marker)
    data = template.parse(html_content)
    shows = []

    for evento in data['info'].split('</blockquote>'):
        raw_show = regexp_tags.sub('', evento).strip().split('\n')
        try:
            shows.append(Show.from_raw_data(raw_show))
        except ValueError:
            pass

    return shows
예제 #6
0
    def find_splits(self, column: List[Cell]) -> Iterable[CompoundSplit]:
        from templater import Templater

        column = [c.get("text", "") for c in column]
        template = Templater(min_block_size=self.min_block_size)
        for cell in column:
            try:
                template.learn(cell)
            except:
                log.debug(f"Failed to add {cell} to template")
                return

        if any(template._template):
            log.debug(f"Template found: {template._template}")
            try:
                newrows = []
                for cell in column:
                    newrows.append(map(str.strip, template.parse(cell)))
                newcols = zip(*newrows)
                if newcols:
                    for i, newcol in enumerate(newcols):
                        if self.col_is_ok(newcol):

                            prefix = template._template[i].strip()
                            if prefix:
                                if prefix.isnumeric():  # TODO: check numeric suffix
                                    newcol = tuple([prefix + c for c in newcol])
                                    prefix = str(i)
                            else:
                                prefix = str(i)

                            yield CompoundSplit(
                                prefix, "string", [{"text": c} for c in newcol]
                            )
            except Exception as e:
                log.debug(f"Failed to parse {cell} using template {template._template}")
예제 #7
0
from time import time
from glob import glob
from templater import Templater


files = glob('html/*.html') # You must have some .html files in html/
template = Templater()
print('Time to learn')
start = time()
for filename in files:
    print('  Learning "%s"...' % filename,)
    fp = open(filename)
    template.learn(fp.read())
    fp.close()
    print('OK')
end = time()
print(' Time:', end - start)

print('Template created:')
print(template._template)

print('Now, work!')
start = time()
for filename in files:
    print('  Parsing "%s"...' % filename)
    fp = open(filename)
    print('  Result:', template.parse(fp.read()))
    fp.close()
end = time()
print(' Time: ', end - start)
예제 #8
0
#!/usr/bin/env python
# coding: utf-8

from os import unlink
from templater import Templater


t = Templater()
t.learn('<b>spam</b>')
t.learn('<b>eggs</b>')
t.learn('<b>ham</b>')
t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF
t.dump('my-template.tpl')
print(t.parse('<b>parsing using first template object</b>'))

t2 = Templater.open('my-little-template.html', marker='|||')
# it removes `\n`/`\r\n` in the end of file before creating template definition
print(t2.parse('<b>parsing using second template object</b>'))

t3 = Templater.load('my-template.tpl')
print(t3.parse('<b>parsing using third template object</b>'))

# 'my-little-template.html' will have the template string with blanks filled by
# '|||'
# 'my-template.tpl' will have the pickle of Templater object

# Removing files:
unlink('my-little-template.html')
unlink('my-template.tpl')
예제 #9
0
#!/usr/bin/env python
# coding: utf-8

from os import unlink
from templater import Templater


t = Templater()
t.learn('<b>spam</b>')
t.learn('<b>eggs</b>')
t.learn('<b>ham</b>')
t.save('my-little-template.html', marker='|||') # will put a `\n` in the EOF
t.dump('my-template.tpl')
print t.parse('<b>parsing using first template object</b>')

t2 = Templater.open('my-little-template.html', marker='|||')
# it removes `\n`/`\r\n` in the end of file before creating template definition
print t2.parse('<b>parsing using second template object</b>')

t3 = Templater.load('my-template.tpl')
print t3.parse('<b>parsing using third template object</b>')

# 'my-little-template.html' will have the template string with blanks filled by
# '|||'
# 'my-template.tpl' will have the pickle of Templater object

# Removing files:
unlink('my-little-template.html')
unlink('my-template.tpl')
예제 #10
0
#!/usr/bin/env python
# coding: utf-8

from re import compile as re_compile
from templater import Templater

regexp_marker = re_compile(r'{{([a-zA-Z0-9_-]*)}}')  # match ''{{var}}''
template = Templater('{{first-var}}<b>{{second-var}}</b>{{third-var}}',
                     marker=regexp_marker)
# regexp marker also works for Templater.open to specify named markers
result = template.parse('This <b> is </b> a test.')  # returns a dict
print(result)

template.save('template-with-named-markers.html', marker='{{{{{}}}}}')
예제 #11
0
from time import time
from glob import glob
from templater import Templater


files = glob('html/*.html') # You must have some .html files in html/
template = Templater()
print 'Time to learn'
start = time()
for filename in files:
    print '  Learning "%s"...' % filename,
    fp = open(filename)
    template.learn(fp.read())
    fp.close()
    print 'OK'
end = time()
print ' Time:', end - start

print 'Template created:'
print template._template

print 'Now, work!'
start = time()
for filename in files:
    print '  Parsing "%s"...' % filename
    fp = open(filename)
    print '  Result:', template.parse(fp.read())
    fp.close()
end = time()
print ' Time: ', end - start
예제 #12
0
#!/usr/bin/env python
# coding: utf-8

from templater import Templater

texts_to_learn = [
    '<b> spam and eggs </b>', '<b> ham and spam </b>',
    '<b> white and black </b>'
]
text_to_parse = texts_to_learn[-1]
template = Templater()
for text in texts_to_learn:
    print('Learning "%s"...' % text)
    template.learn(text)
print('Template created:', template._template)
print('Parsing text "%s"...' % text_to_parse)
print('  Result:', template.parse(text_to_parse))
print('Filling the blanks:', template.join(['', 'yellow', 'blue', '']))
예제 #13
0
def test_should_be_able_to_add_headers_to_a_template_without_named_markers():
    t = Templater(template='|||<u>|||</u>|||', marker='|||')
    t.add_headers(['one', 'two', 'three'])
    result = t.parse('a<u>b</u>c')
    expected = {'one': 'a', 'two': 'b', 'three': 'c'}
    assert result == expected
예제 #14
0
def test_should_be_able_to_add_headers_to_a_template_without_named_markers():
    t = Templater(template='|||<u>|||</u>|||', marker='|||')
    t.add_headers(['one', 'two', 'three'])
    result = t.parse('a<u>b</u>c')
    expected = {'one': 'a', 'two': 'b', 'three': 'c'}
    assert result == expected
예제 #15
0
#!/usr/bin/env python
# coding: utf-8

from templater import Templater


texts_to_learn = ['<b> spam and eggs </b>', '<b> ham and spam </b>',
                  '<b> white and black </b>']
text_to_parse = texts_to_learn[-1]
template = Templater()
for text in texts_to_learn:
    print 'Learning "%s"...' % text
    template.learn(text)
print 'Template created:', template._template
print 'Parsing text "%s"...' % text_to_parse
print '  Result:', template.parse(text_to_parse)
print 'Filling the blanks:', template.join(['', 'yellow', 'blue', ''])
예제 #16
0
#!/usr/bin/env python
# coding: utf-8

from re import compile as re_compile
from templater import Templater


regexp_marker = re_compile(r'{{([a-zA-Z0-9_-]*)}}') # match ''{{var}}''
template = Templater('{{first-var}}<b>{{second-var}}</b>{{third-var}}',
                     marker=regexp_marker)
# regexp marker also works for Templater.open to specify named markers
result = template.parse('This <b> is </b> a test.') # returns a dict
print result

template.save('template-with-named-markers.html', marker='{{{{{}}}}}')