def documents(start_index, end_index): for i in range(start_index, end_index): doc = Document() doc.text = 'this is text' doc.tags['id'] = 'id in tags' doc.tags['inner_dict'] = {'id': 'id in inner_dict'} chunk = Document() chunk.text = 'text in chunk' chunk.tags['id'] = 'id in chunk tags' doc.chunks.append(chunk) yield doc
def random_docs(num_docs): for j in range(1, num_docs + 1): doc = Document() doc.text = f'i\'m dummy doc {j}' doc.offset = 1000 doc.tags['id'] = 1000 # this will be ignored yield doc
def test_data_type_builder_doc(builder, input_data_type, output_data_type): a = Document() a.id = 'a236cbb0eda62d58' a.text = 'text test' d, t = _new_doc_from_data(builder(a), input_data_type) if input_data_type != DataInputType.CONTENT: assert d.id == a.id assert d.text == a.text assert t == output_data_type
def set_hello(d: Document): d.text = 'hello' return d
def bar(d: Document): d.text = 'hello' return d