def test_get_attr(): d = Document({ 'id': '123', 'text': 'document', 'feature1': 121, 'name': 'name', 'tags': { 'id': 'identity', 'a': 'b', 'c': 'd' }, }) d.score = NamedScore(value=42) required_keys = [ 'id', 'text', 'tags__name', 'tags__feature1', 'score__value', 'tags__c', 'tags__id', 'tags__inexistant', 'inexistant', ] res = d.get_attrs(*required_keys) assert len(res.keys()) == len(required_keys) assert res['id'] == '123' assert res['tags__feature1'] == 121 assert res['tags__name'] == 'name' assert res['text'] == 'document' assert res['tags__c'] == 'd' assert res['tags__id'] == 'identity' assert res['score__value'] == 42 assert res['tags__inexistant'] is None assert res['inexistant'] is None res2 = d.get_attrs(*['tags', 'text']) assert len(res2.keys()) == 2 assert res2['text'] == 'document' assert res2['tags'] == d.tags d = Document({ 'id': '123', 'tags': { 'outterkey': { 'innerkey': 'real_value' } } }) res3 = d.get_attrs(*['tags__outterkey__innerkey']) assert len(res3.keys()) == 1 assert res3['tags__outterkey__innerkey'] == 'real_value' d = Document(content=np.array([1, 2, 3])) res4 = d.get_attrs(*['blob']) np.testing.assert_equal(res4['blob'], np.array([1, 2, 3]))
def test_pb_obj2dict(): document = Document() with document: document.text = 'this is text' document.tags['id'] = 'id in tags' document.tags['inner_dict'] = {'id': 'id in inner_dict'} with Document() as chunk: chunk.text = 'text in chunk' chunk.tags['id'] = 'id in chunk tags' document.chunks.add(chunk) res = document.get_attrs('text', 'tags', 'chunks') assert res['text'] == 'this is text' assert res['tags']['id'] == 'id in tags' assert res['tags']['inner_dict']['id'] == 'id in inner_dict' rcs = list(res['chunks']) assert len(rcs) == 1 assert isinstance(rcs[0], Document) assert rcs[0].text == 'text in chunk' assert rcs[0].tags['id'] == 'id in chunk tags'