def __iter__(self): start_match = self._re.match(self._text) iterable = self._re.split(self._text) if iterable[0] == '': iterable.pop(0) pos = 0 length = len(iterable) # special case, starts with word break, add it to first word if start_match is not None: matches = iterable[0:3] pos = 3 yield Item({ "item": matches[1], "type": "word", "@raw": ''.join(matches) }) while pos < length: raw = ''.join(iterable[pos:pos + 2]) if raw != '': yield Item({ "item": iterable[pos], "type": "word", "@raw": raw }) pos += 2
def test_exceptions(): with raises(ValueError) as exc: Item(None, None) assert 'Expected max 1 argument' in str(exc) with raises(ValueError) as exc: Item(None, somekeyword=None) assert "Cannot combine both a positional and keyword arguments" in str(exc) schema = Schema() with raises(SchemaError) as exc: schema.append(None) assert "Wrong type" in str(exc)
def test_item(): item1 = Item() assert len(item1) == 0 assert repr(item1) == 'Item({})' item = Item(a='a_', b='b_') assert len(item) == 2 for k in item: assert k + '_' == item[k] assert repr(item) in [ 'Item({"a": "a_", "b": "b_"})', 'Item({"b": "b_", "a": "a_"})' ] assert item1 != item
def test_roundtrip(): schema = Schema() testlen = 1 for i in range(testlen): schema.append( dict(item=random_str(), start=randint(0, 1e10), end=randint(0, 1e10))) schema.append( Item( OrderedDict(item=random_str(), start=randint(0, 1e10), end=randint(0, 1e10)))) schema.extend(list(schema)) assert len(schema) == testlen * 4 for item in schema: assert type(item) is Item json_ = schema.json() assert Schema.loads(json_) == schema schema = Schema.loads(json_) for item in schema: assert type(item) is Item
def test_encode(): item = Item(item='word', start=12, end=23) itemdict = item._asdict() line = json.dumps(itemdict) line_formatted = json.dumps(itemdict, indent=2) assert item.json() == line assert item.json(indent=2) == line_formatted buffer = io.StringIO() Schema.dump(Schema([item]), buffer) assert ('[%s]' % (item.json(), )) == buffer.getvalue() buffer = io.StringIO() Schema([item]).dump(buffer) assert ('[%s]' % (item.json(), )) == buffer.getvalue() schema = Schema() schema.append(item) schema.append(item) assert len(schema) is 2 assert schema.json() == '[%s, %s]' % ((line, ) * 2) assert schema.json(indent=2) == '[\n%s,\n%s\n]' % ( (textwrap.indent(line_formatted, ' '), ) * 2) assert repr(schema) == ('Schema(%s)' % (schema.json())) class T: ok = False with raises(TypeError) as exc: assert json.dumps(T(), cls=JSONEncoder) assert "is not JSON serializable" in str(exc)
from benchmarkstt.input.core import PlainText, File from benchmarkstt.schema import Item, Schema import pytest candide_file = './resources/test/_data/candide.txt' with open(candide_file) as f: candide = f.read() candide_schema = [Item({"item": "\"There", "type": "word", "@raw": "\n\"There "}), Item({"item": "is", "type": "word", "@raw": "is "}), Item({"item": "a", "type": "word", "@raw": "a "}), Item({"item": "concatenation", "type": "word", "@raw": "concatenation "}), Item({"item": "of", "type": "word", "@raw": "of "}), Item({"item": "events", "type": "word", "@raw": "events "}), Item({"item": "in", "type": "word", "@raw": "in "}), Item({"item": "this", "type": "word", "@raw": "this "}), Item({"item": "best", "type": "word", "@raw": "best "}), Item({"item": "of", "type": "word", "@raw": "of "}), Item({"item": "all", "type": "word", "@raw": "all "}), Item({"item": "possible", "type": "word", "@raw": "possible "}), Item({"item": "worlds:", "type": "word", "@raw": "worlds:\n"}), Item({"item": "for", "type": "word", "@raw": "for "}), Item({"item": "if", "type": "word", "@raw": "if "}), Item({"item": "you", "type": "word", "@raw": "you "}), Item({"item": "had", "type": "word", "@raw": "had "}), Item({"item": "not", "type": "word", "@raw": "not "}), Item({"item": "been", "type": "word", "@raw": "been "}), Item({"item": "kicked", "type": "word", "@raw": "kicked "}), Item({"item": "out", "type": "word", "@raw": "out "}), Item({"item": "of", "type": "word", "@raw": "of "}), Item({"item": "a", "type": "word", "@raw": "a "}),
def test_equality(): assert Schema.loads('[]') == Schema() assert Schema([Item(item='test')]) != Schema() assert Item(item='test') == {'item': 'test'} assert Item({'item': 'test', 'item2': 55}) == Item(item='test', item2=55) assert Item({'item2': 55, 'item': 'test'}) == Item(item='test', item2=55)